When a kernel has __attribute__((reqd_work_group_size(X, Y, Z))) qualifier, the kernel will only accept that group size.
Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> --- backend/src/backend/program.cpp | 9 +++++++++ backend/src/backend/program.h | 4 ++++ backend/src/backend/program.hpp | 13 +++++++++++++ backend/src/ir/function.hpp | 10 ++++++++-- backend/src/llvm/llvm_gen_backend.cpp | 26 ++++++++++++++++++++++++++ src/cl_api.c | 11 +++++++++++ src/cl_device_id.c | 12 ++++++++---- src/cl_device_id.h | 1 - src/cl_kernel.c | 2 ++ src/cl_kernel.h | 2 ++ 10 files changed, 83 insertions(+), 7 deletions(-) diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index a889da9..246a3f2 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -122,6 +122,7 @@ namespace gbe { Kernel *kernel = this->compileKernel(unit, name); kernel->setSamplerSet(pair.second->getSamplerSet()); kernel->setImageSet(pair.second->getImageSet()); + kernel->setCompileWorkGroupSize(pair.second->getCompileWorkGroupSize()); kernels.insert(std::make_pair(name, kernel)); } return true; @@ -772,6 +773,12 @@ namespace gbe { kernel->getSamplerData(samplers); } + static void kernelGetCompileWorkGroupSize(gbe_kernel gbeKernel, size_t wg_size[3]) { + if (gbeKernel == NULL) return; + const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel; + kernel->getCompileWorkGroupSize(wg_size); + } + static size_t kernelGetImageSize(gbe_kernel gbeKernel) { if (gbeKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel; @@ -826,6 +833,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_slm_size_cb *gbe_kernel_get_slm_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL; +GBE_EXPORT_SYMBOL gbe_kernel_get_compile_wg_size_cb *gbe_kernel_get_compile_wg_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data = NULL; GBE_EXPORT_SYMBOL gbe_set_image_base_index_cb *gbe_set_image_base_index = NULL; @@ -862,6 +870,7 @@ namespace gbe gbe_kernel_get_slm_size = gbe::kernelGetSLMSize; gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize; gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData; + gbe_kernel_get_compile_wg_size = gbe::kernelGetCompileWorkGroupSize; gbe_kernel_get_image_size = gbe::kernelGetImageSize; gbe_kernel_get_image_data = gbe::kernelGetImageData; gbe_get_image_base_index = gbe::getImageBaseIndex; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 2640b65..e574764 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -149,6 +149,10 @@ extern gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size; typedef void (gbe_kernel_get_sampler_data_cb)(gbe_kernel gbeKernel, uint32_t *samplers); extern gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data; +/*! Get the content of defined samplers */ +typedef void (gbe_kernel_get_compile_wg_size_cb)(gbe_kernel gbeKernel, size_t wg_sz[3]); +extern gbe_kernel_get_compile_wg_size_cb *gbe_kernel_get_compile_wg_size; + /*! Destroy and deallocate the given program */ typedef void (gbe_program_delete_cb)(gbe_program); extern gbe_program_delete_cb *gbe_program_delete; diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index dd76210..1aa8696 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -132,6 +132,18 @@ namespace gbe { void setImageSet(ir::ImageSet * from) { imageSet = from; } + /*! Set compile work group size */ + void setCompileWorkGroupSize(const size_t wg_sz[3]) { + compile_wg_sz[0] = wg_sz[0]; + compile_wg_sz[1] = wg_sz[1]; + compile_wg_sz[2] = wg_sz[2]; + } + /*! Get compile work group size */ + void getCompileWorkGroupSize (size_t wg_sz[3]) const { + wg_sz[0] = compile_wg_sz[0]; + wg_sz[1] = compile_wg_sz[1]; + wg_sz[2] = compile_wg_sz[2]; + } /*! Get defined image size */ size_t getImageSize(void) const { return imageSet->getDataSize(); } /*! Get defined image value array */ @@ -181,6 +193,7 @@ namespace gbe { Context *ctx; //!< Save context after compiler to alloc constant buffer curbe ir::SamplerSet *samplerSet;//!< Copy from the corresponding function. ir::ImageSet *imageSet; //!< Copy from the corresponding function. + size_t compile_wg_sz[3]; //!< required work group size by kernel attribute. GBE_CLASS(Kernel); //!< Use custom allocators }; diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 84d2504..33a87f7 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -310,6 +310,10 @@ namespace ir { SamplerSet* getSamplerSet(void) const {return samplerSet; } /*! Get image set in this function */ ImageSet* getImageSet(void) const {return imageSet; } + /*! Set required work group size. */ + void setCompileWorkGroupSize(size_t x, size_t y, size_t z) { compile_wg_sz[0] = x; compile_wg_sz[1] = y; compile_wg_sz[2] = z; } + /*! Get required work group size. */ + const size_t *getCompileWorkGroupSize(void) const {return compile_wg_sz;} private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name @@ -326,8 +330,10 @@ namespace ir { uint32_t simdWidth; //!< 8 or 16 if forced, 0 otherwise bool useSLM; //!< Is SLM required? uint32_t slmSize; //!< local variable size inside kernel function - SamplerSet *samplerSet; //!< samplers used in this function. - ImageSet* imageSet; //!< Image set in this function's arguments.. + SamplerSet *samplerSet; //!< samplers used in this function. + ImageSet* imageSet; //!< Image set in this function's arguments.. + size_t compile_wg_sz[3]; //!< required work group size specified by + // __attribute__((reqd_work_group_size(X, Y, Z))). GBE_CLASS(Function); //!< Use custom allocator }; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index d620d44..428b098 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1056,6 +1056,32 @@ namespace gbe { GBE_ASSERTM(F.hasStructRetAttr() == false, "Returned value for kernel functions is forbidden"); + + // Loop over the kernel metadatas to set the required work group size. + NamedMDNode *clKernelMetaDatas = TheModule->getNamedMetadata("opencl.kernels"); + size_t reqd_wg_sz[3] = {0, 0, 0}; + for(uint i = 0; i < clKernelMetaDatas->getNumOperands(); i++) + { + MDNode *node = clKernelMetaDatas->getOperand(i); + if (node->getOperand(0) != &F) continue; + while(node->getNumOperands() > 1) { + MDNode *attrNode = dyn_cast_or_null<MDNode>(node->getOperand(1)); + if (attrNode == NULL) break; + MDString *attrName = dyn_cast_or_null<MDString>(attrNode->getOperand(0)); + if (attrName && attrName->getString() == "reqd_work_group_size") { + GBE_ASSERT(attrNode->getNumOperands() == 4); + ConstantInt *x = dyn_cast<ConstantInt>(attrNode->getOperand(1)); + ConstantInt *y = dyn_cast<ConstantInt>(attrNode->getOperand(2)); + ConstantInt *z = dyn_cast<ConstantInt>(attrNode->getOperand(3)); + GBE_ASSERT(x && y && z); + reqd_wg_sz[0] = x->getZExtValue(); + reqd_wg_sz[1] = y->getZExtValue(); + reqd_wg_sz[2] = z->getZExtValue(); + } + break; + } + } + ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]); // Loop over the arguments and output registers for them if (!F.arg_empty()) { uint32_t argID = 0; diff --git a/src/cl_api.c b/src/cl_api.c index d15354b..59c47d3 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2413,6 +2413,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, goto error; } + /* XXX No event right now */ //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); //FATAL_IF(event_wait_list != NULL, "Events are not supported"); @@ -2428,6 +2429,16 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, for (i = 0; i < work_dim; ++i) fixed_global_off[i] = global_work_offset[i]; + if (kernel->compile_wg_sz[0] || kernel->compile_wg_sz[1] || kernel->compile_wg_sz[2]) { + if (fixed_local_sz[0] != kernel->compile_wg_sz[0] + || fixed_local_sz[1] != kernel->compile_wg_sz[1] + || fixed_local_sz[2] != kernel->compile_wg_sz[2]) + { + err = CL_INVALID_WORK_GROUP_SIZE; + goto error; + } + } + /* Do device specific checks are enqueue the kernel */ err = cl_command_queue_ND_range(command_queue, kernel, diff --git a/src/cl_device_id.c b/src/cl_device_id.c index acc91e9..1124d30 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -42,7 +42,6 @@ static struct _cl_device_id intel_ivb_gt2_device = { .max_work_group_size = 1024, .max_clock_frequency = 1000, .wg_sz = 1024, - .compile_wg_sz = {0}, #include "cl_gen7_device.h" }; @@ -54,7 +53,6 @@ static struct _cl_device_id intel_ivb_gt1_device = { .max_work_group_size = 512, .max_clock_frequency = 1000, .wg_sz = 512, - .compile_wg_sz = {0}, #include "cl_gen7_device.h" }; @@ -67,7 +65,6 @@ static struct _cl_device_id intel_hsw_device = { .max_work_group_size = 512, .max_clock_frequency = 1000, .wg_sz = 512, - .compile_wg_sz = {0}, #include "cl_gen75_device.h" }; @@ -290,7 +287,6 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, switch (param_name) { DECL_FIELD(WORK_GROUP_SIZE, wg_sz) - DECL_FIELD(COMPILE_WORK_GROUP_SIZE, compile_wg_sz) DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, preferred_wg_sz_mul) case CL_KERNEL_LOCAL_MEM_SIZE: if (param_value_size < sizeof(cl_ulong)) @@ -299,6 +295,14 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, *param_value_size_ret = sizeof(cl_ulong); *(cl_ulong*)param_value = gbe_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz; return CL_SUCCESS; + case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: + if (param_value_size < sizeof(kernel->compile_wg_sz)) + return CL_INVALID_VALUE; + if (param_value_size_ret != NULL) + *param_value_size_ret = sizeof(kernel->compile_wg_sz); + memcpy(param_value, kernel->compile_wg_sz, sizeof(kernel->compile_wg_sz)); + return CL_SUCCESS; + default: return CL_INVALID_VALUE; }; } diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 56ffd33..4ece26c 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -95,7 +95,6 @@ struct _cl_device_id { size_t built_in_kernels_sz; /* Kernel specific info that we're assigning statically */ size_t wg_sz; - size_t compile_wg_sz[3]; size_t preferred_wg_sz_mul; }; diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 4ba1c11..9a2a737 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -230,6 +230,7 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque) assert(k->sampler_sz <= GEN_MAX_SAMPLERS); if (k->sampler_sz > 0) gbe_kernel_get_sampler_data(k->opaque, k->samplers); + gbe_kernel_get_compile_wg_size(k->opaque, k->compile_wg_sz); /* Get image data & size */ k->image_sz = gbe_kernel_get_image_size(k->opaque); assert(k->sampler_sz <= GEN_MAX_SURFACES); @@ -263,6 +264,7 @@ cl_kernel_dup(cl_kernel from) to->curbe_sz = from->curbe_sz; to->sampler_sz = from->sampler_sz; to->image_sz = from->image_sz; + memcpy(to->compile_wg_sz, from->compile_wg_sz, sizeof(from->compile_wg_sz)); if (to->sampler_sz) memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t)); if (to->image_sz) { diff --git a/src/cl_kernel.h b/src/cl_kernel.h index acb7206..608ed8e 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -57,6 +57,8 @@ struct _cl_kernel { struct ImageInfo *images; /* images defined in kernel args */ size_t image_sz; /* image count in kernel args */ cl_ulong local_mem_sz; /* local memory size specified in kernel args. */ + size_t compile_wg_sz[3]; /* Required workgroup size by __attribute__((reqd_work_gro + up_size(X, Y, Z))) qualifier.*/ cl_argument *args; /* To track argument setting */ uint32_t arg_n:31; /* Number of arguments */ uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */ -- 1.7.9.5 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet