From: Junyan He <junyan...@linux.intel.com> The profilingProlog will collect useful information for profiling, including XYZ global range and prolog timestamp.
Signed-off-by: Junyan He <junyan...@linux.intel.com> --- backend/src/backend/gen_context.cpp | 116 +++++++++++++++++++++++++++++++++++ backend/src/backend/gen_context.hpp | 2 + 2 files changed, 118 insertions(+) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 435b224..696d86a 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2185,6 +2185,119 @@ namespace gbe p->TYPED_WRITE(header, true, bti); } + void GenContext::calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int flag, int subFlag) + { +#define CALC_GID(dim) do {\ + GenRegister g##dim##start = GenRegister::offset(reg, 0, 8 + dim*8); \ + GenRegister g##dim##end = GenRegister::offset(g##dim##start, 0, 4); \ + GenRegister id##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud16grf(ir::ocl::lid##dim)), GEN_TYPE_UD); \ + GenRegister localsz##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::lsize##dim)), GEN_TYPE_UD); \ + GenRegister gid##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::groupid##dim)), GEN_TYPE_UD); \ + GenRegister goffset##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::goffset##dim)), GEN_TYPE_UD); \ + p->MUL(g##dim##start, localsz##dim, gid##dim); \ + p->ADD(g##dim##start, g##dim##start, id##dim); \ + p->ADD(g##dim##start, g##dim##start, goffset##dim); \ + GenRegister ip; \ + p->MOV(flagReg, GenRegister::immuw(0x0)); \ + p->curr.useFlag(flag, subFlag); \ + p->curr.predicate = GEN_PREDICATE_NONE; \ + if (this->simdWidth == 16) \ + p->curr.execWidth = 16; \ + else \ + p->curr.execWidth = 8; \ + if (!isDWLabel()) { \ + ip = ra->genReg(GenRegister::uw16grf(ir::ocl::blockip)); \ + p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immuw(0xffff)); \ + } else { \ + ip = ra->genReg(GenRegister::ud16grf(ir::ocl::dwblockip)); \ + p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immud(0xffffffff)); \ + } \ + p->curr.execWidth = 1; \ + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UW), flagReg); \ + if (this->simdWidth == 16) \ + p->OR(tmp, tmp, GenRegister::immud(0xffff0000)); \ + else \ + p->OR(tmp, tmp, GenRegister::immud(0xffffff00)); \ + p->FBL(tmp, tmp); \ + p->ADD(tmp, tmp, GenRegister::negate(GenRegister::immud(0x1))); \ + p->MUL(tmp, tmp, GenRegister::immud(4)); \ + p->MOV(GenRegister::addr1(0), GenRegister::retype(tmp, GEN_TYPE_UW)); \ + GenRegister dimEnd = GenRegister::to_indirect1xN(id##dim, 0); \ + p->MOV(tmp, dimEnd); \ + p->MUL(g##dim##end, localsz##dim, gid##dim); \ + p->ADD(g##dim##end, g##dim##end, tmp); \ + p->ADD(g##dim##end, g##dim##end, goffset##dim); \ +} while(0) + + GenRegister flagReg = GenRegister::flag(flag, subFlag); + p->push(); { + p->curr.execWidth = 1; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + CALC_GID(0); + CALC_GID(1); + CALC_GID(2); + } p->pop(); + +#undef CALC_GID + } + + void GenContext::profilingProlog(void) { + // record the prolog, globalXYZ and lasttimestamp at the very beginning. + GenRegister profilingReg2, profilingReg3, profilingReg4; + GenRegister tmArf = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, + 0xc0, + 0, + GEN_TYPE_UW, + GEN_VERTICAL_STRIDE_4, + GEN_WIDTH_4, + GEN_HORIZONTAL_STRIDE_1); + if (this->simdWidth == 16) { + profilingReg2 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts1)); + profilingReg3 = GenRegister::offset(profilingReg2, 1); + profilingReg4 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts2)); + } else { + GBE_ASSERT(this->simdWidth == 8); + profilingReg2 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts2)); + profilingReg3 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts3)); + profilingReg4 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts4)); + } + + /* MOV(4) prolog<1>:UW arf_tm<4,4,1>:UW */ + /* MOV(4) lastTsReg<1>:UW prolog<4,4,1>:UW */ + GenRegister prolog = profilingReg2; + prolog.type = GEN_TYPE_UW; + prolog.hstride = GEN_HORIZONTAL_STRIDE_1; + prolog.vstride = GEN_VERTICAL_STRIDE_4; + prolog.width = GEN_WIDTH_4; + prolog = GenRegister::offset(prolog, 0, 4*sizeof(uint32_t)); + + GenRegister lastTsReg = GenRegister::toUniform(profilingReg3, GEN_TYPE_UL); + lastTsReg = GenRegister::offset(lastTsReg, 0, 2*sizeof(uint64_t)); + lastTsReg.type = GEN_TYPE_UW; + lastTsReg.hstride = GEN_HORIZONTAL_STRIDE_1; + lastTsReg.vstride = GEN_VERTICAL_STRIDE_4; + lastTsReg.width = GEN_WIDTH_4; + + GenRegister gids = GenRegister::toUniform(profilingReg4, GEN_TYPE_UD); + GenRegister tmp = GenRegister::toUniform(profilingReg4, GEN_TYPE_UD); + + // X Y and Z + this->calcGlobalXYZRange(gids, tmp, 0, 1); + + p->push(); { + p->curr.execWidth = 4; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + p->MOV(prolog, tmArf); + p->MOV(lastTsReg, tmArf); + } p->pop(); + + p->NOP(); + p->NOP(); + return; + } + void GenContext::emitCalcTimestampInstruction(const SelectionInstruction &insn) { } @@ -2317,6 +2430,9 @@ namespace gbe schedulePostRegAllocation(*this, *this->sel); if (OCL_OUTPUT_REG_ALLOC) ra->outputAllocation(); + if (inProfilingMode) { // add the profiling prolog before do anything. + this->profilingProlog(); + } this->clearFlagRegister(); this->emitStackPointer(); this->emitSLMOffset(); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index bbd48cf..e36c8e6 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -86,6 +86,7 @@ namespace gbe /*! Simd width chosen for the current function */ INLINE uint32_t getSimdWidth(void) const { return simdWidth; } void clearFlagRegister(void); + void profilingProlog(void); /*! check the flag reg, if is grf, use f0.1 instead */ GenRegister checkFlagRegister(GenRegister flagReg); /*! Emit the per-lane stack pointer computation */ @@ -221,6 +222,7 @@ namespace gbe void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0); virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0); + void calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int flag, int subFlag); private: CompileErrorCode errCode; -- 1.7.9.5 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet