From: Pan Xiuli <xiuli....@intel.com> Use curbe register for these two size.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 7 +++-- backend/src/backend/program.h | 3 ++ backend/src/ir/profile.cpp | 4 +++ backend/src/ir/profile.hpp | 47 ++++++++++++++++-------------- backend/src/libocl/src/ocl_workitem.cl | 17 ++++------- backend/src/llvm/llvm_gen_backend.cpp | 6 ++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 3 ++ src/cl_command_queue_gen7.c | 6 +++- 8 files changed, 57 insertions(+), 36 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 90e15c3..77614b6 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3423,8 +3423,11 @@ namespace gbe reg == ir::ocl::lid1 || reg == ir::ocl::lid2 || reg == ir::ocl::lsize0 || - reg == ir::ocl::lsize1|| - reg == ir::ocl::lsize2) + reg == ir::ocl::lsize1 || + reg == ir::ocl::lsize2 || + reg == ir::ocl::enqlsize0 || + reg == ir::ocl::enqlsize1 || + reg == ir::ocl::enqlsize2) return true; else return false; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 4dd3ae3..a690e3d 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -83,6 +83,9 @@ enum gbe_curbe_type { GBE_CURBE_LOCAL_SIZE_X, GBE_CURBE_LOCAL_SIZE_Y, GBE_CURBE_LOCAL_SIZE_Z, + GBE_CURBE_ENQUEUED_LOCAL_SIZE_X, + GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y, + GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z, GBE_CURBE_GLOBAL_SIZE_X, GBE_CURBE_GLOBAL_SIZE_Y, GBE_CURBE_GLOBAL_SIZE_Z, diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index ce5e8e7..0907d76 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -35,6 +35,7 @@ namespace ir { "group_id_0", "group_id_1", "group_id_2", "num_groups_0", "num_groups_1", "num_groups_2", "local_size_0", "local_size_1", "local_size_2", + "enqueued_local_size_0", "enqueued_local_size_1", "enqueued_local_size_2", "global_size_0", "global_size_1", "global_size_2", "global_offset_0", "global_offset_1", "global_offset_2", "stack_pointer", "stack_buffer", @@ -71,6 +72,9 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, lsize0, 1, GBE_CURBE_LOCAL_SIZE_X); DECL_NEW_REG(FAMILY_DWORD, lsize1, 1, GBE_CURBE_LOCAL_SIZE_Y); DECL_NEW_REG(FAMILY_DWORD, lsize2, 1, GBE_CURBE_LOCAL_SIZE_Z); + DECL_NEW_REG(FAMILY_DWORD, enqlsize0, 1, GBE_CURBE_ENQUEUED_LOCAL_SIZE_X); + DECL_NEW_REG(FAMILY_DWORD, enqlsize1, 1, GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y); + DECL_NEW_REG(FAMILY_DWORD, enqlsize2, 1, GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z); DECL_NEW_REG(FAMILY_DWORD, gsize0, 1, GBE_CURBE_GLOBAL_SIZE_X); DECL_NEW_REG(FAMILY_DWORD, gsize1, 1, GBE_CURBE_GLOBAL_SIZE_Y); DECL_NEW_REG(FAMILY_DWORD, gsize2, 1, GBE_CURBE_GLOBAL_SIZE_Z); diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 35b49e3..b093adf 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -53,28 +53,31 @@ namespace ir { static const Register lsize0 = Register(9); // get_local_size(0) static const Register lsize1 = Register(10); // get_local_size(1) static const Register lsize2 = Register(11); // get_local_size(2) - static const Register gsize0 = Register(12); // get_global_size(0) - static const Register gsize1 = Register(13); // get_global_size(1) - static const Register gsize2 = Register(14); // get_global_size(2) - static const Register goffset0 = Register(15); // get_global_offset(0) - static const Register goffset1 = Register(16); // get_global_offset(1) - static const Register goffset2 = Register(17); // get_global_offset(2) - static const Register stackptr = Register(18); // stack pointer - static const Register stackbuffer = Register(19); // stack buffer base address. - static const Register blockip = Register(20); // blockip - static const Register barrierid = Register(21);// barrierid - static const Register threadn = Register(22); // number of threads - static const Register workdim = Register(23); // work dimention. - static const Register zero = Register(24); // scalar register holds zero. - static const Register one = Register(25); // scalar register holds one. - static const Register retVal = Register(26); // helper register to do data flow analysis. - static const Register printfbptr = Register(27); // printf buffer address . - static const Register printfiptr = Register(28); // printf index buffer address. - static const Register dwblockip = Register(29); // blockip - static const Register threadid = Register(30); // the thread id of this thread. - static const Register constant_addrspace = Register(31); // starting address of program-scope constant - static const Register stacksize = Register(32); // stack buffer total size - static const uint32_t regNum = 33; // number of special registers + static const Register enqlsize0 = Register(12); // get_local_size(0) + static const Register enqlsize1 = Register(13); // get_local_size(1) + static const Register enqlsize2 = Register(14); // get_local_size(2) + static const Register gsize0 = Register(15); // get_global_size(0) + static const Register gsize1 = Register(16); // get_global_size(1) + static const Register gsize2 = Register(17); // get_global_size(2) + static const Register goffset0 = Register(18); // get_global_offset(0) + static const Register goffset1 = Register(19); // get_global_offset(1) + static const Register goffset2 = Register(20); // get_global_offset(2) + static const Register stackptr = Register(21); // stack pointer + static const Register stackbuffer = Register(22); // stack buffer base address. + static const Register blockip = Register(23); // blockip + static const Register barrierid = Register(24);// barrierid + static const Register threadn = Register(25); // number of threads + static const Register workdim = Register(26); // work dimention. + static const Register zero = Register(27); // scalar register holds zero. + static const Register one = Register(28); // scalar register holds one. + static const Register retVal = Register(29); // helper register to do data flow analysis. + static const Register printfbptr = Register(30); // printf buffer address . + static const Register printfiptr = Register(31); // printf index buffer address. + static const Register dwblockip = Register(32); // blockip + static const Register threadid = Register(33); // the thread id of this thread. + static const Register constant_addrspace = Register(34); // starting address of program-scope constant + static const Register stacksize = Register(35); // stack buffer total size + static const uint32_t regNum = 36; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ diff --git a/backend/src/libocl/src/ocl_workitem.cl b/backend/src/libocl/src/ocl_workitem.cl index 235f12b..dc8fa6d 100644 --- a/backend/src/libocl/src/ocl_workitem.cl +++ b/backend/src/libocl/src/ocl_workitem.cl @@ -30,6 +30,7 @@ PURE CONST unsigned int __gen_ocl_##NAME##1(void); \ PURE CONST unsigned int __gen_ocl_##NAME##2(void); DECL_INTERNAL_WORK_ITEM_FN(get_group_id) DECL_INTERNAL_WORK_ITEM_FN(get_local_id) +DECL_INTERNAL_WORK_ITEM_FN(get_enqueued_local_size) DECL_INTERNAL_WORK_ITEM_FN(get_local_size) DECL_INTERNAL_WORK_ITEM_FN(get_global_size) DECL_INTERNAL_WORK_ITEM_FN(get_global_offset) @@ -46,6 +47,7 @@ OVERLOADABLE unsigned NAME(unsigned int dim) { \ DECL_PUBLIC_WORK_ITEM_FN(get_group_id, 0) DECL_PUBLIC_WORK_ITEM_FN(get_local_id, 0) +DECL_PUBLIC_WORK_ITEM_FN(get_enqueued_local_size, 1) DECL_PUBLIC_WORK_ITEM_FN(get_local_size, 1) DECL_PUBLIC_WORK_ITEM_FN(get_global_size, 1) DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0) @@ -53,14 +55,7 @@ DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1) #undef DECL_PUBLIC_WORK_ITEM_FN OVERLOADABLE uint get_global_id(uint dim) { - return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + get_global_offset(dim); -} - -OVERLOADABLE uint get_enqueued_local_size (uint dimindx) -{ - //TODO: should be different with get_local_size when support - //non-uniform work-group size - return get_local_size(dimindx); + return get_local_id(dim) + get_enqueued_local_size(dim) * get_group_id(dim) + get_global_offset(dim); } OVERLOADABLE uint get_global_linear_id(void) @@ -80,8 +75,8 @@ OVERLOADABLE uint get_local_linear_id(void) { uint dim = __gen_ocl_get_work_dim(); if (dim == 1) return get_local_id(0); - else if (dim == 2) return get_local_id(1) * get_local_size (0) + get_local_id(0); - else if (dim == 3) return (get_local_id(2) * get_local_size(1) * get_local_size(0)) + - (get_local_id(1) * get_local_size(0)) + get_local_id(0); + else if (dim == 2) return get_local_id(1) * get_enqueued_local_size(0) + get_local_id(0); + else if (dim == 3) return (get_local_id(2) * get_enqueued_local_size(1) * get_local_size(0)) + + (get_local_id(1) * get_enqueued_local_size(0)) + get_local_id(0); else return 0; } diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index cca1781..4a98678 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3507,6 +3507,12 @@ namespace gbe regTranslator.newScalarProxy(ir::ocl::lsize1, dst); break; case GEN_OCL_GET_LOCAL_SIZE2: regTranslator.newScalarProxy(ir::ocl::lsize2, dst); break; + case GEN_OCL_GET_ENQUEUED_LOCAL_SIZE0: + regTranslator.newScalarProxy(ir::ocl::enqlsize0, dst); break; + case GEN_OCL_GET_ENQUEUED_LOCAL_SIZE1: + regTranslator.newScalarProxy(ir::ocl::enqlsize1, dst); break; + case GEN_OCL_GET_ENQUEUED_LOCAL_SIZE2: + regTranslator.newScalarProxy(ir::ocl::enqlsize2, dst); break; case GEN_OCL_GET_GLOBAL_SIZE0: regTranslator.newScalarProxy(ir::ocl::gsize0, dst); break; case GEN_OCL_GET_GLOBAL_SIZE1: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 7bd59fc..09feb1a 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -10,6 +10,9 @@ DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS2, __gen_ocl_get_num_groups2) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE0, __gen_ocl_get_local_size0) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE1, __gen_ocl_get_local_size1) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE2, __gen_ocl_get_local_size2) +DECL_LLVM_GEN_FUNCTION(GET_ENQUEUED_LOCAL_SIZE0, __gen_ocl_get_enqueued_local_size0) +DECL_LLVM_GEN_FUNCTION(GET_ENQUEUED_LOCAL_SIZE1, __gen_ocl_get_enqueued_local_size1) +DECL_LLVM_GEN_FUNCTION(GET_ENQUEUED_LOCAL_SIZE2, __gen_ocl_get_enqueued_local_size2) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE0, __gen_ocl_get_global_size0) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE1, __gen_ocl_get_global_size1) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE2, __gen_ocl_get_global_size2) diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 38cf56b..6bfacbf 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -221,6 +221,7 @@ cl_curbe_fill(cl_kernel ker, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz, + const size_t *enqueued_local_wk_sz, size_t thread_n) { int32_t offset; @@ -230,6 +231,9 @@ cl_curbe_fill(cl_kernel ker, UPLOAD(GBE_CURBE_LOCAL_SIZE_X, local_wk_sz[0]); UPLOAD(GBE_CURBE_LOCAL_SIZE_Y, local_wk_sz[1]); UPLOAD(GBE_CURBE_LOCAL_SIZE_Z, local_wk_sz[2]); + UPLOAD(GBE_CURBE_ENQUEUED_LOCAL_SIZE_X, enqueued_local_wk_sz[0]); + UPLOAD(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y, enqueued_local_wk_sz[1]); + UPLOAD(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z, enqueued_local_wk_sz[2]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_X, global_wk_sz[0]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_Y, global_wk_sz[1]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_Z, global_wk_sz[2]); @@ -374,7 +378,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, } /* Curbe step 1: fill the constant urb buffer data shared by all threads */ if (ker->curbe) { - kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n); + kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz,local_wk_sz ,local_wk_sz, thread_n); if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) { fprintf(stderr, "Beignet: Out of shared local memory %d.\n", kernel.slm_sz); return CL_OUT_OF_RESOURCES; -- 2.5.0 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet