When SLM enable, get kernal max workgroup size should return the a sub slice's max thread * simdwidth. So need the sub slice information.
Signed-off-by: Yang Rong <rong.r.y...@intel.com> --- src/cl_device_id.c | 13 +++++++++++-- src/cl_device_id.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 9e63e81..a1e3e82 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -40,6 +40,7 @@ static struct _cl_device_id intel_ivb_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 16, .max_thread_per_unit = 8, + .sub_slice_count = 2, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -50,6 +51,7 @@ static struct _cl_device_id intel_ivb_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 6, .max_thread_per_unit = 6, + .sub_slice_count = 1, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, @@ -60,6 +62,7 @@ static struct _cl_device_id intel_baytrail_t_device = { INIT_ICD(dispatch) .max_compute_unit = 4, .max_thread_per_unit = 8, + .sub_slice_count = 1, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, @@ -71,6 +74,7 @@ static struct _cl_device_id intel_hsw_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 10, .max_thread_per_unit = 7, + .sub_slice_count = 1, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -81,6 +85,7 @@ static struct _cl_device_id intel_hsw_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 20, .max_thread_per_unit = 7, + .sub_slice_count = 2, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -91,6 +96,7 @@ static struct _cl_device_id intel_hsw_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 40, .max_thread_per_unit = 7, + .sub_slice_count = 4, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -102,6 +108,7 @@ static struct _cl_device_id intel_brw_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 12, .max_thread_per_unit = 7, + .sub_slice_count = 2, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -112,6 +119,7 @@ static struct _cl_device_id intel_brw_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 24, .max_thread_per_unit = 7, + .sub_slice_count = 3, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -122,6 +130,7 @@ static struct _cl_device_id intel_brw_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 48, .max_thread_per_unit = 7, + .sub_slice_count = 6, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -634,8 +643,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel) work_group_size = kernel->program->ctx->device->max_compute_unit * kernel->program->ctx->device->max_thread_per_unit * simd_width; } else - work_group_size = kernel->program->ctx->device->max_work_group_size / - (16 / simd_width); + work_group_size = kernel->program->ctx->device->max_compute_unit * simd_width * + kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count; return work_group_size; } diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 31bce47..afc32e2 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -27,6 +27,7 @@ struct _cl_device_id { cl_uint vendor_id; cl_uint max_compute_unit; // maximum EU number cl_uint max_thread_per_unit; // maximum EU threads per EU. + cl_uint sub_slice_count; // Device's sub slice count cl_uint max_work_item_dimensions; // should be 3. size_t max_work_item_sizes[3]; // equal to maximum work group size. size_t max_work_group_size; // maximum work group size under simd16 mode. -- 1.8.3.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet