to be precise, I'll separate the patch into two, one for bxt, and another one for chv.
-----Original Message----- From: Pan, Xiuli Sent: Monday, September 12, 2016 10:57 AM To: Guo, Yejun; beignet@lists.freedesktop.org Subject: RE: [Beignet] [PATCH] add another broxton pciid 0x5A85 I think the pciid patch can go first and then the stack size one, for the stack size patch will influence not only BXT but also CHV. -----Original Message----- From: Guo, Yejun Sent: Monday, September 12, 2016 10:39 AM To: Pan, Xiuli <xiuli....@intel.com>; beignet@lists.freedesktop.org Subject: RE: [Beignet] [PATCH] add another broxton pciid 0x5A85 thanks, and the stack size bug need to be fixed to get 100% passrate of utest for this pciid, that's the reason I merge them into one patch. -----Original Message----- From: Pan, Xiuli Sent: Monday, September 12, 2016 10:36 AM To: Guo, Yejun; beignet@lists.freedesktop.org Cc: Guo, Yejun Subject: RE: [Beignet] [PATCH] add another broxton pciid 0x5A85 I think this patch can be spilt into two patch. One is add this pciid. and the other is for the stack size bug. Others LGTM. -----Original Message----- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Guo Yejun Sent: Saturday, September 10, 2016 8:49 AM To: beignet@lists.freedesktop.org Cc: Guo, Yejun <yejun....@intel.com> Subject: [Beignet] [PATCH] add another broxton pciid 0x5A85 Signed-off-by: Guo Yejun <yejun....@intel.com> --- src/cl_command_queue_gen7.c | 11 ++++++----- src/cl_device_data.h | 4 +++- src/cl_device_id.c | 23 ++++++++++++++++++++++- src/cl_driver.h | 4 ++++ src/cl_driver_defs.c | 1 + src/intel/intel_driver.c | 10 ++++++++++ 6 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 6a9cf1f..b6a5920 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -272,12 +272,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) assert(offset >= 0); stack_sz *= interp_kernel_get_simd_width(ker->opaque); stack_sz *= device->max_compute_unit * ctx->device->max_thread_per_unit; - /* Because HSW calc stack offset per thread is relative with half slice, when - thread schedule in half slice is not balance, would out of bound. Because - the max half slice is 4 in GT4, multiply stack size with 4 for safe. + + /* for some hardware, part of EUs are disabled with EU id reserved, + * it makes the active EU id larger than count of EUs within a subslice, + * need to enlarge stack size for such case to avoid out of range. */ - if(cl_driver_get_ver(ctx->drv) == 75) - stack_sz *= 4; + cl_driver_enlarge_stack_size(ctx->drv, &stack_sz); + cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE); } diff --git a/src/cl_device_data.h b/src/cl_device_data.h index f680219..30366ea 100644 --- a/src/cl_device_data.h +++ b/src/cl_device_data.h @@ -298,9 +298,11 @@ /* BXT */ #define PCI_CHIP_BROXTON_P 0x5A84 /* Intel(R) BXT-P for mobile desktop */ +#define PCI_CHIP_BROXTON_1 0x5A85 #define IS_BROXTON(devid) \ - (devid == PCI_CHIP_BROXTON_P) + (devid == PCI_CHIP_BROXTON_P || \ + devid == PCI_CHIP_BROXTON_1) #define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906 #define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916 diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 34c182c..ce340c1 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -195,6 +195,16 @@ static struct _cl_device_id intel_bxt_device = { #include "cl_gen9_device.h" }; +static struct _cl_device_id intel_bxt1_device = { + .max_compute_unit = 12, + .max_thread_per_unit = 6, + .sub_slice_count = 2, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + static struct _cl_device_id intel_kbl_gt1_device = { .max_compute_unit = 12, .max_thread_per_unit = 7, @@ -615,6 +625,16 @@ bxt_break: cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; + case PCI_CHIP_BROXTON_1: + DECL_INFO_STRING(bxt1_break, intel_bxt1_device, name, "Intel(R) +HD Graphics Broxton 1"); +bxt1_break: + intel_bxt1_device.device_id = device_id; + intel_bxt1_device.platform = cl_get_platform_default(); + ret = &intel_bxt1_device; + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + case PCI_CHIP_KABYLAKE_ULT_GT1: DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULT GT1"); case PCI_CHIP_KABYLAKE_DT_GT1: @@ -931,6 +951,7 @@ LOCAL cl_bool is_gen_device(cl_device_id device) { device == &intel_skl_gt3_device || device == &intel_skl_gt4_device || device == &intel_bxt_device || + device == &intel_bxt1_device || device == &intel_kbl_gt1_device || device == &intel_kbl_gt15_device || device == &intel_kbl_gt2_device || @@ -1074,7 +1095,7 @@ cl_device_get_version(cl_device_id device, cl_int *ver) *ver = 8; } else if (device == &intel_skl_gt1_device || device == &intel_skl_gt2_device || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device - || device == &intel_bxt_device || device == &intel_kbl_gt1_device + || device == &intel_bxt_device || device == &intel_bxt1_device + || device == &intel_kbl_gt1_device || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device) { *ver = 9; diff --git a/src/cl_driver.h b/src/cl_driver.h index 16730db..584be9d 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -51,6 +51,10 @@ extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr; typedef uint32_t (cl_driver_get_ver_cb)(cl_driver); extern cl_driver_get_ver_cb *cl_driver_get_ver; +/* enlarge stack size from the driver */ typedef void +(cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*); extern +cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size; + typedef enum cl_self_test_res{ SELF_TEST_PASS = 0, SELF_TEST_SLM_FAIL = 1, diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 31176a4..ea4e90a 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL; LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL; +LOCAL cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size = +NULL; LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL; LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index e561725..0766ca3 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -464,6 +464,15 @@ intel_driver_get_ver(struct intel_driver *drv) } static void +intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t +*stack_size) { + if (drv->gen_ver == 75) + *stack_size = *stack_size * 4; + else if (drv->device_id == PCI_CHIP_BROXTON_1 || IS_CHERRYVIEW(drv->device_id)) + *stack_size = *stack_size * 2; +} + +static void intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) { drv->atomic_test_result = atomic_flag; @@ -921,6 +930,7 @@ intel_setup_callbacks(void) cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new; cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete; cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver; + cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *) + intel_driver_enlarge_stack_size; cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag; cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr; cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id; -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet