get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16, previously we set up the values in kernel payload, now change it to generate the values inside kernel with packed integer vector.
v2: encapsulate into a function so that others can get the lane id easily. Signed-off-by: Guo Yejun <yejun....@intel.com> --- backend/src/backend/gen_context.cpp | 8 -------- backend/src/backend/gen_insn_selection.cpp | 28 ++++++++++++++++++++++++++-- backend/src/backend/program.h | 1 - backend/src/ir/profile.cpp | 1 - backend/src/ir/profile.hpp | 7 +++---- src/cl_command_queue_gen7.c | 8 -------- 6 files changed, 29 insertions(+), 24 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index e16b0a9..29b58df 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2217,13 +2217,8 @@ namespace gbe allocCurbeReg(reg, GBE_CURBE_##PATCH); \ } else - bool needLaneID = false; fn.foreachInstruction([&](ir::Instruction &insn) { const uint32_t srcNum = insn.getSrcNum(); - if (insn.getOpcode() == ir::OP_SIMD_ID) { - GBE_ASSERT(srcNum == 0); - needLaneID = true; - } for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9 +2257,6 @@ namespace gbe }); #undef INSERT_REG - if (needLaneID) - allocCurbeReg(laneid, GBE_CURBE_LANE_ID); - // After this point the vector is immutable. Sorting it will make // research faster std::sort(kernel->patches.begin(), kernel->patches.end()); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index b0ba9e3..598238d 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -401,6 +401,7 @@ namespace gbe return GenRegister::offset(reg, nr, subnr); } + GenRegister getLaneIDReg(); /*! Implement public class */ INLINE uint32_t getRegNum(void) const { return file.regNum(); } /*! Implements public interface */ @@ -1661,6 +1662,29 @@ namespace gbe insn->src(1) = src1; } + GenRegister Selection::Opaque::getLaneIDReg() + { + const GenRegister laneID = GenRegister::immv(0x76543210); + ir::Register r = reg(ir::RegisterFamily::FAMILY_WORD); + const GenRegister dst = selReg(r, ir::TYPE_U16); + + uint32_t execWidth = curr.execWidth; + if (execWidth == 8) + MOV(dst, laneID); + else { + push(); + curr.execWidth = 8; + curr.noMask = 1; + MOV(dst, laneID); + //Packed Unsigned Half-Byte Integer Vector does not work + //have to mock by adding 8 to the singed vector + const GenRegister eight = GenRegister::immuw(8); + ADD(GenRegister::offset(dst, 0, 16), dst, eight); + pop(); + } + return dst; + } + void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2); insn->src(0) = src0; @@ -2299,8 +2323,8 @@ namespace gbe break; case ir::OP_SIMD_ID: { - const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32); - sel.MOV(dst, selLaneID); + GenRegister laneID = sel.getLaneIDReg(); + sel.MOV(dst, laneID); } break; default: NOT_SUPPORTED; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 3637ebb..56db1a1 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -101,7 +101,6 @@ enum gbe_curbe_type { GBE_CURBE_THREAD_NUM, GBE_CURBE_ZERO, GBE_CURBE_ONE, - GBE_CURBE_LANE_ID, GBE_CURBE_SLM_OFFSET, GBE_CURBE_BTI_UTIL, }; diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index af9f698..37f2d3d 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -90,7 +90,6 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0); - DECL_NEW_REG(FAMILY_DWORD, laneid, 0); DECL_NEW_REG(FAMILY_DWORD, invalid, 1); DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1); } diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 9323824..bf909be 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -72,10 +72,9 @@ namespace ir { static const Register printfbptr = Register(28); // printf buffer address . static const Register printfiptr = Register(29); // printf index buffer address. static const Register dwblockip = Register(30); // blockip - static const Register laneid = Register(31); // lane id. - static const Register invalid = Register(32); // used for valid comparation. - static const Register btiUtil = Register(33); // used for mixed pointer as bti utility. - static const uint32_t regNum = 34; // number of special registers + static const Register invalid = Register(31); // used for valid comparation. + static const Register btiUtil = Register(32); // used for mixed pointer as bti utility. + static const uint32_t regNum = 33; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 89f39b3..4adbd2b 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker, UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD - /* get_sub_group_id needs it */ - if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) { - const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); - uint32_t *laneid = (uint32_t *) (ker->curbe + offset); - int32_t i; - for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; - } - /* Write identity for the stack pointer. This is required by the stack pointer * computation in the kernel */ -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet