uint __gen_ocl_get_simd_id(); return value ranges from 0 to simdsize - 1 V2: use function sel.selReg to refine code Signed-off-by: Guo Yejun <yejun....@intel.com> --- backend/src/backend/gen_context.cpp | 9 ++++++++- backend/src/backend/gen_insn_selection.cpp | 6 ++++++ backend/src/backend/program.h | 1 + backend/src/ir/instruction.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/ir/liveness.cpp | 5 +++++ backend/src/ir/profile.cpp | 2 ++ backend/src/ir/profile.hpp | 5 +++-- backend/src/libocl/tmpl/ocl_simd.tmpl.h | 1 + backend/src/llvm/llvm_gen_backend.cpp | 7 +++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + src/cl_command_queue_gen7.c | 8 ++++++++ 13 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 684ecaf..62fd596 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2013,9 +2013,14 @@ namespace gbe if (curbeRegs.find(reg) != curbeRegs.end()) continue; \ allocCurbeReg(reg, GBE_CURBE_##PATCH); \ } else - + + bool needLaneID = false; fn.foreachInstruction([&](ir::Instruction &insn) { const uint32_t srcNum = insn.getSrcNum(); + if (insn.getOpcode() == ir::OP_SIMD_ID) { + GBE_ASSERT(srcNum == 0); + needLaneID = true; + } for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2054,6 +2059,8 @@ namespace gbe }); #undef INSERT_REG + if (needLaneID) + allocCurbeReg(laneid, GBE_CURBE_LANE_ID); // After this point the vector is immutable. Sorting it will make // research faster diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 026a858..19a3c24 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2137,6 +2137,12 @@ namespace gbe sel.MOV(dst, src); } break; + case ir::OP_SIMD_ID: + { + const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32); + sel.MOV(dst, selLaneID); + } + break; default: NOT_SUPPORTED; } sel.pop(); diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 554fb16..8c171f5 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -101,6 +101,7 @@ enum gbe_curbe_type { GBE_CURBE_THREAD_NUM, GBE_CURBE_ZERO, GBE_CURBE_ONE, + GBE_CURBE_LANE_ID, GBE_CURBE_SLM_OFFSET, }; diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 86148bc..7723b90 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex } DECL_EMIT_FUNCTION(SIMD_SIZE) + DECL_EMIT_FUNCTION(SIMD_ID) #undef DECL_EMIT_FUNCTION diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index c603d9e..436bfd2 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -572,6 +572,8 @@ namespace ir { Instruction ALU0(Opcode opcode, Type type, Register dst); /*! simd_size.type dst */ Instruction SIMD_SIZE(Type type, Register dst); + /*! simd_id.type dst */ + Instruction SIMD_ID(Type type, Register dst); /*! alu1.type dst src */ Instruction ALU1(Opcode opcode, Type type, Register dst, Register src); /*! mov.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index f86cfbb..3f08a92 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -26,6 +26,7 @@ * \author Benjamin Segovia <benjamin.sego...@intel.com> */ DECL_INSN(SIMD_SIZE, NullaryInstruction) +DECL_INSN(SIMD_ID, NullaryInstruction) DECL_INSN(MOV, UnaryInstruction) DECL_INSN(COS, UnaryInstruction) DECL_INSN(SIN, UnaryInstruction) diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index 2b1ffdb..26c4129 100644 --- a/backend/src/ir/liveness.cpp +++ b/backend/src/ir/liveness.cpp @@ -66,6 +66,11 @@ namespace ir { const uint32_t srcNum = insn.getSrcNum(); const uint32_t dstNum = insn.getDstNum(); bool uniform = true; + + //have no way to decide the dst uniform if there is no source + if (srcNum == 0) + uniform = false; + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const Register reg = insn.getSrc(srcID); if (!fn.isUniformRegister(reg)) diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index ec7ab94..2f6539a 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -44,6 +44,7 @@ namespace ir { "retVal", "slm_offset", "printf_buffer_pointer", "printf_index_buffer_pointer", "dwblockip", + "lane_id", "invalid" }; @@ -88,6 +89,7 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0); + DECL_NEW_REG(FAMILY_DWORD, laneid, 0); DECL_NEW_REG(FAMILY_DWORD, invalid, 1); } #undef DECL_NEW_REG diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 8f69320..4de6fe0 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -72,8 +72,9 @@ namespace ir { static const Register printfbptr = Register(28); // printf buffer address . static const Register printfiptr = Register(29); // printf index buffer address. static const Register dwblockip = Register(30); // blockip - static const Register invalid = Register(31); // used for valid comparation. - static const uint32_t regNum = 32; // number of special registers + static const Register laneid = Register(31); // lane id. + static const Register invalid = Register(32); // used for valid comparation. + static const uint32_t regNum = 33; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index b992902..620e329 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -25,3 +25,4 @@ ///////////////////////////////////////////////////////////////////////////// uint __gen_ocl_get_simd_size(void); +uint __gen_ocl_get_simd_id(void); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index ac67add..f46bc79 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2806,6 +2806,7 @@ namespace gbe case GEN_OCL_SIMD_SIZE: case GEN_OCL_READ_TM: case GEN_OCL_REGION: + case GEN_OCL_SIMD_ID: this->newRegister(&I); break; case GEN_OCL_PRINTF: @@ -3461,6 +3462,12 @@ namespace gbe ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst); break; } + case GEN_OCL_SIMD_ID: + { + const ir::Register dst = this->getRegister(&I); + ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst); + break; + } default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 2b151f2..e2bffde 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size) +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id) DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 4adbd2b..e27a211 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker, UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD + /* __gen_ocl_get_simd_id needs it */ + if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) { + const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); + uint32_t *laneid = (uint32_t *) (ker->curbe + offset); + int32_t i; + for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; + } + /* Write identity for the stack pointer. This is required by the stack pointer * computation in the kernel */ -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet