[Beignet] [PATCH 7/8] BDW: Add device's sub slice field, for cl_get_kernel_max_wg_sz.

Yang Rong Sun, 28 Sep 2014 22:37:22 -0700

When SLM enable, get kernal max workgroup size should return the a sub slice's 
max thread * simdwidth.
So need the sub slice information.


Signed-off-by: Yang Rong <rong.r.y...@intel.com>
---
 src/cl_device_id.c | 13 +++++++++++--
 src/cl_device_id.h |  1 +
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 9e63e81..a1e3e82 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -40,6 +40,7 @@ static struct _cl_device_id intel_ivb_gt2_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 16,
   .max_thread_per_unit = 8,
+  .sub_slice_count = 2,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -50,6 +51,7 @@ static struct _cl_device_id intel_ivb_gt1_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 6,
   .max_thread_per_unit = 6,
+  .sub_slice_count = 1,
   .max_work_item_sizes = {512, 512, 512},
   .max_work_group_size = 512,
   .max_clock_frequency = 1000,
@@ -60,6 +62,7 @@ static struct _cl_device_id intel_baytrail_t_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 4,
   .max_thread_per_unit = 8,
+  .sub_slice_count = 1,
   .max_work_item_sizes = {512, 512, 512},
   .max_work_group_size = 512,
   .max_clock_frequency = 1000,
@@ -71,6 +74,7 @@ static struct _cl_device_id intel_hsw_gt1_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 10,
   .max_thread_per_unit = 7,
+  .sub_slice_count = 1,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -81,6 +85,7 @@ static struct _cl_device_id intel_hsw_gt2_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 20,
   .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -91,6 +96,7 @@ static struct _cl_device_id intel_hsw_gt3_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 40,
   .max_thread_per_unit = 7,
+  .sub_slice_count = 4,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -102,6 +108,7 @@ static struct _cl_device_id intel_brw_gt1_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 12,
   .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -112,6 +119,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 24,
   .max_thread_per_unit = 7,
+  .sub_slice_count = 3,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -122,6 +130,7 @@ static struct _cl_device_id intel_brw_gt3_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 48,
   .max_thread_per_unit = 7,
+  .sub_slice_count = 6,
   .max_work_item_sizes = {1024, 1024, 1024},
   .max_work_group_size = 1024,
   .max_clock_frequency = 1000,
@@ -634,8 +643,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
       work_group_size = kernel->program->ctx->device->max_compute_unit *
                         kernel->program->ctx->device->max_thread_per_unit * 
simd_width;
   } else
-    work_group_size = kernel->program->ctx->device->max_work_group_size /
-                      (16 / simd_width);
+    work_group_size = kernel->program->ctx->device->max_compute_unit * 
simd_width *
+                 kernel->program->ctx->device->max_thread_per_unit / 
kernel->program->ctx->device->sub_slice_count;
   return work_group_size;
 }
 
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 31bce47..afc32e2 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -27,6 +27,7 @@ struct _cl_device_id {
   cl_uint  vendor_id;
   cl_uint  max_compute_unit;               // maximum EU number
   cl_uint  max_thread_per_unit;            // maximum EU threads per EU.
+  cl_uint  sub_slice_count;                // Device's sub slice count
   cl_uint  max_work_item_dimensions;       // should be 3.
   size_t   max_work_item_sizes[3];         // equal to maximum work group size.
   size_t   max_work_group_size;            // maximum work group size under 
simd16 mode.
-- 
1.8.3.2

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

[Beignet] [PATCH 7/8] BDW: Add device's sub slice field, for cl_get_kernel_max_wg_sz.

Reply via email to