Re: [Beignet] [PATCH] use different pointer alignment for different implementation

2016-08-01 Thread Pan, Xiuli
LGTM!

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Guo 
Yejun
Sent: Tuesday, August 2, 2016 4:16 AM
To: beignet@lists.freedesktop.org
Cc: Guo, Yejun 
Subject: [Beignet] [PATCH] use different pointer alignment for different 
implementation

beignet only requirs 64 bytes alignment while other implementations might 
require 4096 alignment.

and also change function cl_check_beignet for better output message.

Signed-off-by: Guo Yejun 
---
 utests/compiler_time_stamp.cpp|  4 +++-
 utests/runtime_use_host_ptr_image.cpp |  8 ++--
 utests/utest_helper.cpp   | 10 ++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp 
index e376522..43165c1 100644
--- a/utests/compiler_time_stamp.cpp
+++ b/utests/compiler_time_stamp.cpp
@@ -16,8 +16,10 @@ static void cpu(int global_id, int *src, int *dst) {
 
 void compiler_time_stamp(void)
 {
-  if (!cl_check_beignet())
+  if (!cl_check_beignet()) {
+printf("Not beignet device , Skip!");
 return;
+  }
 
   const size_t n = 16;
   int cpu_dst[16], cpu_src[16];
diff --git a/utests/runtime_use_host_ptr_image.cpp 
b/utests/runtime_use_host_ptr_image.cpp
index 2de9194..4a30e89 100644
--- a/utests/runtime_use_host_ptr_image.cpp
+++ b/utests/runtime_use_host_ptr_image.cpp
@@ -18,8 +18,12 @@ static void runtime_use_host_ptr_image(void)
   desc.image_width = w;
   desc.image_height = h;
 
+  size_t alignment = 4096;  //page size  if (cl_check_beignet())
+alignment = 64; //cacheline size, beignet has loose limitaiont to 
enable userptr
+
   //src image
-  int ret = posix_memalign(&buf_data[0], 64, sizeof(uint32_t) * w * h);
+  int ret = posix_memalign(&buf_data[0], alignment, sizeof(uint32_t) * 
+ w * h);
   OCL_ASSERT(ret == 0);
   for (size_t i = 0; i < w*h; ++i)
 ((uint32_t*)buf_data[0])[i] = i;
@@ -27,7 +31,7 @@ static void runtime_use_host_ptr_image(void)
   OCL_CREATE_IMAGE(buf[0], CL_MEM_USE_HOST_PTR, &format, &desc, buf_data[0]);
 
   //dst image
-  ret = posix_memalign(&buf_data[1], 64, sizeof(uint32_t) * w * h);
+  ret = posix_memalign(&buf_data[1], alignment, sizeof(uint32_t) * w * 
+ h);
   OCL_ASSERT(ret == 0);
   for (size_t i = 0; i < w*h; ++i)
 ((uint32_t*)buf_data[1])[i] = 0;
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp index 
da4cfbf..3388d9f 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -860,7 +860,6 @@ int cl_check_beignet(void)
   size_t ret_sz;
   OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_VERSION, 0, 0, 
¶m_value_size);
   if(param_value_size == 0) {
-printf("Not beignet device , Skip!");
 return 0;
   }
   char* device_version_str = (char* )malloc(param_value_size * sizeof(char) ); 
@@ -869,7 +868,6 @@ int cl_check_beignet(void)
 
   if(!strstr(device_version_str, "beignet")) {
 free(device_version_str);
-printf("Not beignet device , Skip!");
 return 0;
   }
   free(device_version_str);
@@ -906,8 +904,10 @@ int cl_check_ocl20(void)
 if(cl_check_beignet()) {
   printf("Beignet extension test!");
   return 1;
+} else {
+  printf("Not beignet device , Skip!");
+  return 0;
 }
-return 0;
   }
   char* device_version_str = (char* )malloc(param_value_size * sizeof(char) );
   OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 
param_value_size, (void*)device_version_str, &ret_sz); @@ -919,8 +919,10 @@ int 
cl_check_ocl20(void)
 if(cl_check_beignet()) {
   printf("Beignet extension test!");
   return 1;
+} else {
+  printf("Not beignet device , Skip!");
+  return 0;
 }
-return 0;
   }
   free(device_version_str);
   return 1;
--
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [Patch V3] intel: Export pooled EU and min no. of eus in a pool.

2016-08-01 Thread Yang, Rong R
I sent a new version, could you check this and give comments/ACK?

Thanks,
Yang Rong

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Yang Rong
> Sent: Tuesday, August 2, 2016 15:51
> To: beignet@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Cc: Yang, Rong R 
> Subject: [Beignet] [Patch V3] intel: Export pooled EU and min no. of eus in a
> pool.
> 
> Update kernel interface with new I915_GETPARAM ioctl entries for pooled
> EU and min no. of eus in a pool. Add a wrapping function for each parameter.
> Userspace drivers need these values when decide the thread count. This
> kernel enabled pooled eu by default for BXT and for fused down 2x6 parts it
> is advised to turn it off.
> 
> But there is another HW issue in these parts (fused down 2x6 parts) before
> C0 that requires Pooled EU to be enabled as a workaround. In this case the
> pool configuration changes depending upon which subslice is disabled and
> the no. of eus in a pool is different, So userspace need to know min no. of
> eus in a pool.
> 
> V2: use return value as the query results.
> ret < 0 when error, ret = 0 when not support, and ret > 0 indicate
> query results.(Chris)
> V3: Correct V2 errors.
> 
> Signed-off-by: Yang Rong 
> ---
>  include/drm/i915_drm.h   |  2 ++
>  intel/intel_bufmgr.h |  3 +++
>  intel/intel_bufmgr_gem.c | 30 ++
>  3 files changed, 35 insertions(+)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index
> c4ce6b2..eb611a7 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -357,6 +357,8 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_GPU_RESET  35
>  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
>  #define I915_PARAM_HAS_EXEC_SOFTPIN   37
> +#define I915_PARAM_HAS_POOLED_EU 38
> +#define I915_PARAM_MIN_EU_IN_POOL39
> 
>  typedef struct drm_i915_getparam {
>   __s32 param;
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index
> a1abbcd..96a4d9d 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -273,6 +273,9 @@ int drm_intel_get_reset_stats(drm_intel_context
> *ctx,  int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total);
> int drm_intel_get_eu_total(int fd, unsigned int *eu_total);
> 
> +int drm_intel_get_pooled_eu(int fd);
> +int drm_intel_get_min_eu_in_pool(int fd);
> +
>  /** @{ Compatibility defines to keep old code building despite the symbol
> rename
>   * from dri_* to drm_intel_*
>   */
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index
> 0a4012b..b0a0eb9 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -3237,6 +3237,36 @@ drm_intel_get_eu_total(int fd, unsigned int
> *eu_total)
>   return 0;
>  }
> 
> +int
> +drm_intel_get_pooled_eu(int fd)
> +{
> + drm_i915_getparam_t gp;
> + int ret;
> +
> + memclear(gp);
> + gp.param = I915_PARAM_HAS_POOLED_EU;
> + gp.value = &ret;
> + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
> + return -errno;
> +
> + return ret;
> +}
> +
> +int
> +drm_intel_get_min_eu_in_pool(int fd)
> +{
> + drm_i915_getparam_t gp;
> + int ret;
> +
> + memclear(gp);
> + gp.param = I915_PARAM_MIN_EU_IN_POOL;
> + gp.value = &ret;
> + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
> + return -errno;
> +
> + return ret;
> +}
> +
>  /**
>   * Annotate the given bo for use in aub dumping.
>   *
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 1/2] FindLLVM: allow LLVM/Clang 3.8 and reorder the llvm-config priority.

2016-08-01 Thread Song, Ruiling
LGTM

Thanks!
Ruiling

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Yang Rong
> Sent: Monday, August 1, 2016 2:48 PM
> To: beignet@lists.freedesktop.org
> Cc: Yang, Rong R 
> Subject: [Beignet] [PATCH 1/2] FindLLVM: allow LLVM/Clang 3.8 and reorder
> the llvm-config priority.
> 
> Beignet support 3 released llvm version, now is 3.6, 3.7, 3.8. For
> LLVM 3.4, 3.5, beignet may still support them, but full test don't cover
> them.
> 
> Signed-off-by: Yang Rong 
> ---
>  CMake/FindLLVM.cmake | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/CMake/FindLLVM.cmake b/CMake/FindLLVM.cmake
> index 83c8934..6129909 100644
> --- a/CMake/FindLLVM.cmake
> +++ b/CMake/FindLLVM.cmake
> @@ -8,12 +8,12 @@
>  # LLVM_FOUND   - True if llvm found.
>  if (LLVM_INSTALL_DIR)
>find_program(LLVM_CONFIG_EXECUTABLE
> -   NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 
> llvm-config-3.6
> llvm-config-37 llvm-config-3.7 llvm-config-33 llvm-config-3.3 llvm-config-34
> llvm-config-3.4 llvm-config
> +   NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 
> llvm-config-3.6
> llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 
> llvm-
> config-34 llvm-config-3.4
> DOC "llvm-config executable"
> PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
>  else (LLVM_INSTALL_DIR)
>find_program(LLVM_CONFIG_EXECUTABLE
> -   NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 
> llvm-config-3.6
> llvm-config-37 llvm-config-3.7 llvm-config-33 llvm-config-3.3 llvm-config-34
> llvm-config-3.4 llvm-config
> +   NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 
> llvm-config-3.6
> llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 
> llvm-
> config-34 llvm-config-3.4
> DOC "llvm-config executable")
>  endif (LLVM_INSTALL_DIR)
> 
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [Patch V3] intel: Export pooled EU and min no. of eus in a pool.

2016-08-01 Thread Yang Rong
Update kernel interface with new I915_GETPARAM ioctl entries for
pooled EU and min no. of eus in a pool. Add a wrapping function
for each parameter. Userspace drivers need these values when decide
the thread count. This kernel enabled pooled eu by default for BXT
and for fused down 2x6 parts it is advised to turn it off.

But there is another HW issue in these parts (fused
down 2x6 parts) before C0 that requires Pooled EU to be enabled as a
workaround. In this case the pool configuration changes depending upon
which subslice is disabled and the no. of eus in a pool is different,
So userspace need to know min no. of eus in a pool.

V2: use return value as the query results.
ret < 0 when error, ret = 0 when not support, and ret > 0 indicate
query results.(Chris)
V3: Correct V2 errors.

Signed-off-by: Yang Rong 
---
 include/drm/i915_drm.h   |  2 ++
 intel/intel_bufmgr.h |  3 +++
 intel/intel_bufmgr_gem.c | 30 ++
 3 files changed, 35 insertions(+)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c4ce6b2..eb611a7 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -357,6 +357,8 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_GPU_RESET35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
 #define I915_PARAM_HAS_EXEC_SOFTPIN 37
+#define I915_PARAM_HAS_POOLED_EU 38
+#define I915_PARAM_MIN_EU_IN_POOL39
 
 typedef struct drm_i915_getparam {
__s32 param;
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index a1abbcd..96a4d9d 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -273,6 +273,9 @@ int drm_intel_get_reset_stats(drm_intel_context *ctx,
 int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total);
 int drm_intel_get_eu_total(int fd, unsigned int *eu_total);
 
+int drm_intel_get_pooled_eu(int fd);
+int drm_intel_get_min_eu_in_pool(int fd);
+
 /** @{ Compatibility defines to keep old code building despite the symbol 
rename
  * from dri_* to drm_intel_*
  */
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 0a4012b..b0a0eb9 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -3237,6 +3237,36 @@ drm_intel_get_eu_total(int fd, unsigned int *eu_total)
return 0;
 }
 
+int
+drm_intel_get_pooled_eu(int fd)
+{
+   drm_i915_getparam_t gp;
+   int ret;
+
+   memclear(gp);
+   gp.param = I915_PARAM_HAS_POOLED_EU;
+   gp.value = &ret;
+   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
+   return -errno;
+
+   return ret;
+}
+
+int
+drm_intel_get_min_eu_in_pool(int fd)
+{
+   drm_i915_getparam_t gp;
+   int ret;
+
+   memclear(gp);
+   gp.param = I915_PARAM_MIN_EU_IN_POOL;
+   gp.value = &ret;
+   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
+   return -errno;
+
+   return ret;
+}
+
 /**
  * Annotate the given bo for use in aub dumping.
  *
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [Patch V2 3/3] KBL: add kabylake runtime support.

2016-08-01 Thread Yang Rong
Kabylake is almost same as skylake, so use skylake functions directly.

Signed-off-by: Yang Rong 
---
 src/cl_command_queue.c   |   2 +-
 src/cl_device_id.c   | 224 +--
 src/cl_gen10_device.h|  31 +++
 src/intel/intel_driver.c |   4 +-
 src/intel/intel_gpgpu.c  |   2 +-
 5 files changed, 213 insertions(+), 50 deletions(-)
 create mode 100644 src/cl_gen10_device.h

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index b66928f..07c5d89 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -222,7 +222,7 @@ cl_command_queue_ND_range(cl_command_queue queue,
   /* Check that the user did not forget any argument */
   TRY (cl_kernel_check_args, k);
 
-  if (ver == 7 || ver == 75 || ver == 8 || ver == 9)
+  if (ver == 7 || ver == 75 || ver == 8 || ver == 9 || ver == 10)
 TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, 
global_wk_sz, local_wk_sz);
   else
 FATAL ("Unknown Gen Device");
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 6ea..35a19a6 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -210,6 +210,61 @@ static struct _cl_device_id intel_bxt_device = {
 #include "cl_gen9_device.h"
 };
 
+static struct _cl_device_id intel_kbl_gt1_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 12,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt15_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 18,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 3,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt2_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 24,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 3,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt3_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 48,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 6,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt4_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 72,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 9,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
 LOCAL cl_device_id
 cl_get_gt_device(void)
 {
@@ -580,6 +635,98 @@ bxt_break:
   cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
   break;
 
+case PCI_CHIP_KABYLAKE_ULT_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake ULT GT1");
+case PCI_CHIP_KABYLAKE_DT_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake Desktop GT1");
+case PCI_CHIP_KABYLAKE_HALO_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake Halo GT1");
+case PCI_CHIP_KABYLAKE_ULX_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake ULX GT1");
+case PCI_CHIP_KABYLAKE_SRV_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake Server GT1");
+kbl_gt1_break:
+  intel_kbl_gt1_device.device_id = device_id;
+  intel_kbl_gt1_device.platform = cl_get_platform_default();
+  ret = &intel_kbl_gt1_device;
+#ifdef ENABLE_FP64
+  cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+  cl_intel_platform_get_default_extension(ret);
+  cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+  break;
+
+case PCI_CHIP_KABYLAKE_ULT_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake ULT GT1.5");
+case PCI_CHIP_KABYLAKE_DT_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake Desktop GT1.5");
+case PCI_CHIP_KABYLAKE_HALO_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake Halo GT1.5");
+case PCI_CHIP_KABYLAKE_ULX_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake ULX GT1.5");
+kbl_gt15_break:
+  intel_kbl_gt15_device.device_id = device_id;
+  intel_kbl_gt15_device.platform = cl_get_platform_default();
+  ret = &intel_kbl_gt15_device;
+#ifdef ENABLE_FP64
+  cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+  cl_intel_platform_get_default_extension(ret);
+  cl_intel_p

[Beignet] [Patch V2 1/3] KBL: add kabylake pciids.

2016-08-01 Thread Yang Rong
Signed-off-by: Yang Rong 
---
 src/cl_device_data.h | 55 
 1 file changed, 55 insertions(+)

diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index f789feb..de96299 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -304,5 +304,60 @@
 
 #define IS_GEN9(devid)  (IS_SKYLAKE(devid) || IS_BROXTON(devid))
 
+#define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906
+#define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916
+#define PCI_CHIP_KABYLAKE_ULT_GT3 0x5926
+#define PCI_CHIP_KABYLAKE_ULT_GT150x5913
+#define PCI_CHIP_KABYLAKE_ULT_GT2_1   0x5921
+#define PCI_CHIP_KABYLAKE_ULT_GT3_1   0x5923
+#define PCI_CHIP_KABYLAKE_ULT_GT3_2   0x5927
+#define PCI_CHIP_KABYLAKE_DT_GT1  0x5902
+#define PCI_CHIP_KABYLAKE_DT_GT2  0x5912
+#define PCI_CHIP_KABYLAKE_DT_GT15 0x5917
+#define PCI_CHIP_KABYLAKE_HALO_GT10x590B
+#define PCI_CHIP_KABYLAKE_HALO_GT20x591B
+#define PCI_CHIP_KABYLAKE_HALO_GT40x593B
+#define PCI_CHIP_KABYLAKE_HALO_GT15   0x5908
+#define PCI_CHIP_KABYLAKE_ULX_GT1 0x590E
+#define PCI_CHIP_KABYLAKE_ULX_GT2 0x591E
+#define PCI_CHIP_KABYLAKE_ULX_GT150x5915
+#define PCI_CHIP_KABYLAKE_SRV_GT1 0x590A
+#define PCI_CHIP_KABYLAKE_SRV_GT2 0x591A
+#define PCI_CHIP_KABYLAKE_WKS_GT2 0x591D
+
+#define IS_KBL_GT1(devid) \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT1 ||  \
+   devid == PCI_CHIP_KABYLAKE_DT_GT1 ||   \
+   devid == PCI_CHIP_KABYLAKE_HALO_GT1 || \
+   devid == PCI_CHIP_KABYLAKE_ULX_GT1 ||  \
+   devid == PCI_CHIP_KABYLAKE_SRV_GT1)
+
+#define IS_KBL_GT15(devid) \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT15 ||  \
+   devid == PCI_CHIP_KABYLAKE_DT_GT15 ||   \
+   devid == PCI_CHIP_KABYLAKE_HALO_GT15 || \
+   devid == PCI_CHIP_KABYLAKE_ULX_GT15)
+
+#define IS_KBL_GT2(devid)  \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT2 ||   \
+   devid == PCI_CHIP_KABYLAKE_ULT_GT2_1 || \
+   devid == PCI_CHIP_KABYLAKE_DT_GT2 ||\
+   devid == PCI_CHIP_KABYLAKE_HALO_GT2 ||  \
+   devid == PCI_CHIP_KABYLAKE_ULX_GT2 ||   \
+   devid == PCI_CHIP_KABYLAKE_SRV_GT2 ||   \
+   devid == PCI_CHIP_KABYLAKE_WKS_GT2)
+
+#define IS_KBL_GT3(devid)  \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT3 ||   \
+   devid == PCI_CHIP_KABYLAKE_ULT_GT3_1 || \
+   devid == PCI_CHIP_KABYLAKE_ULT_GT3_2)
+
+#define IS_KBL_GT4(devid)   \
+  (devid == PCI_CHIP_KABYLAKE_HALO_GT4)
+
+#define IS_KABYLAKE(devid) (IS_KBL_GT1(devid) || IS_KBL_GT15(devid) || 
IS_KBL_GT2(devid) || IS_KBL_GT3(devid) || IS_KBL_GT4(devid))
+
+#define IS_GEN10(devid) IS_KABYLAKE(devid)
+
 #endif /* __CL_DEVICE_DATA_H__ */
 
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [Patch V2 2/3] KBL: add kabylake backend support.

2016-08-01 Thread Yang Rong
Kabylake is almost same as skylake, derived it from GEN9.

Signed-off-by: Yang Rong 
---
 backend/src/CMakeLists.txt |  4 +++
 backend/src/backend/gen10_context.cpp  | 31 +++
 backend/src/backend/gen10_context.hpp  | 49 ++
 backend/src/backend/gen10_encoder.cpp  | 33 
 backend/src/backend/gen10_encoder.hpp  | 40 
 backend/src/backend/gen_insn_selection.cpp | 10 ++
 backend/src/backend/gen_insn_selection.hpp |  7 +
 backend/src/backend/gen_program.cpp| 15 +++--
 8 files changed, 186 insertions(+), 3 deletions(-)
 create mode 100644 backend/src/backend/gen10_context.cpp
 create mode 100644 backend/src/backend/gen10_context.hpp
 create mode 100644 backend/src/backend/gen10_encoder.cpp
 create mode 100644 backend/src/backend/gen10_encoder.hpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 41eb5ec..f368ae1 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -116,6 +116,8 @@ set (GBE_SRC
 backend/gen8_context.cpp
 backend/gen9_context.hpp
 backend/gen9_context.cpp
+backend/gen10_context.hpp
+backend/gen10_context.cpp
 backend/gen_program.cpp
 backend/gen_program.hpp
 backend/gen_program.h
@@ -133,6 +135,8 @@ set (GBE_SRC
 backend/gen8_encoder.cpp
 backend/gen9_encoder.hpp
 backend/gen9_encoder.cpp
+backend/gen10_encoder.hpp
+backend/gen10_encoder.cpp
 )
 
 set (GBE_LINK_LIBRARIES
diff --git a/backend/src/backend/gen10_context.cpp 
b/backend/src/backend/gen10_context.cpp
new file mode 100644
index 000..a8dc316
--- /dev/null
+++ b/backend/src/backend/gen10_context.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ */
+
+/**
+ * \file gen9_context.cpp
+ */
+
+#include "backend/gen10_context.hpp"
+#include "backend/gen_insn_selection.hpp"
+
+namespace gbe
+{
+  void Gen10Context::newSelection(void) {
+this->sel = GBE_NEW(Selection10, *this);
+  }
+}
diff --git a/backend/src/backend/gen10_context.hpp 
b/backend/src/backend/gen10_context.hpp
new file mode 100644
index 000..87dbffc
--- /dev/null
+++ b/backend/src/backend/gen10_context.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ */
+
+/**
+ * \file gen10_context.hpp
+ */
+#ifndef __GBE_gen10_CONTEXT_HPP__
+#define __GBE_gen10_CONTEXT_HPP__
+
+#include "backend/gen9_context.hpp"
+#include "backend/gen10_encoder.hpp"
+
+namespace gbe
+{
+  /* This class is used to implement the HSW
+ specific logic for context. */
+  class Gen10Context : public Gen9Context
+  {
+  public:
+virtual ~Gen10Context(void) { };
+Gen10Context(const ir::Unit &unit, const std::string &name, uint32_t 
deviceID, bool relaxMath = false)
+: Gen9Context(unit, name, deviceID, relaxMath) {
+};
+
+  protected:
+virtual GenEncoder* generateEncoder(void) {
+  return GBE_NEW(Gen10Encoder, this->simdWidth, 10, deviceID);
+}
+
+  private:
+virtual void newSelection(void);
+  };
+}
+#endif /* __GBE_GEN10_CONTEXT_HPP__ */
diff --git a/backend/src/backend/gen10_encoder.cpp 
b/backend/src/backend/gen10_encoder.cpp
new file mode 100644
index 000..0550e9f
--- /dev/null
+++ b/backend/src/backend/gen10_encoder.cpp
@@ -0,0 +1,33 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Soft

[Beignet] [PATCH 1/3] KBL: add kabylake pciids.

2016-08-01 Thread Yang Rong
Signed-off-by: Yang Rong 
---
 src/cl_device_data.h | 55 
 1 file changed, 55 insertions(+)

diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index f789feb..de96299 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -304,5 +304,60 @@
 
 #define IS_GEN9(devid)  (IS_SKYLAKE(devid) || IS_BROXTON(devid))
 
+#define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906
+#define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916
+#define PCI_CHIP_KABYLAKE_ULT_GT3 0x5926
+#define PCI_CHIP_KABYLAKE_ULT_GT150x5913
+#define PCI_CHIP_KABYLAKE_ULT_GT2_1   0x5921
+#define PCI_CHIP_KABYLAKE_ULT_GT3_1   0x5923
+#define PCI_CHIP_KABYLAKE_ULT_GT3_2   0x5927
+#define PCI_CHIP_KABYLAKE_DT_GT1  0x5902
+#define PCI_CHIP_KABYLAKE_DT_GT2  0x5912
+#define PCI_CHIP_KABYLAKE_DT_GT15 0x5917
+#define PCI_CHIP_KABYLAKE_HALO_GT10x590B
+#define PCI_CHIP_KABYLAKE_HALO_GT20x591B
+#define PCI_CHIP_KABYLAKE_HALO_GT40x593B
+#define PCI_CHIP_KABYLAKE_HALO_GT15   0x5908
+#define PCI_CHIP_KABYLAKE_ULX_GT1 0x590E
+#define PCI_CHIP_KABYLAKE_ULX_GT2 0x591E
+#define PCI_CHIP_KABYLAKE_ULX_GT150x5915
+#define PCI_CHIP_KABYLAKE_SRV_GT1 0x590A
+#define PCI_CHIP_KABYLAKE_SRV_GT2 0x591A
+#define PCI_CHIP_KABYLAKE_WKS_GT2 0x591D
+
+#define IS_KBL_GT1(devid) \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT1 ||  \
+   devid == PCI_CHIP_KABYLAKE_DT_GT1 ||   \
+   devid == PCI_CHIP_KABYLAKE_HALO_GT1 || \
+   devid == PCI_CHIP_KABYLAKE_ULX_GT1 ||  \
+   devid == PCI_CHIP_KABYLAKE_SRV_GT1)
+
+#define IS_KBL_GT15(devid) \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT15 ||  \
+   devid == PCI_CHIP_KABYLAKE_DT_GT15 ||   \
+   devid == PCI_CHIP_KABYLAKE_HALO_GT15 || \
+   devid == PCI_CHIP_KABYLAKE_ULX_GT15)
+
+#define IS_KBL_GT2(devid)  \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT2 ||   \
+   devid == PCI_CHIP_KABYLAKE_ULT_GT2_1 || \
+   devid == PCI_CHIP_KABYLAKE_DT_GT2 ||\
+   devid == PCI_CHIP_KABYLAKE_HALO_GT2 ||  \
+   devid == PCI_CHIP_KABYLAKE_ULX_GT2 ||   \
+   devid == PCI_CHIP_KABYLAKE_SRV_GT2 ||   \
+   devid == PCI_CHIP_KABYLAKE_WKS_GT2)
+
+#define IS_KBL_GT3(devid)  \
+  (devid == PCI_CHIP_KABYLAKE_ULT_GT3 ||   \
+   devid == PCI_CHIP_KABYLAKE_ULT_GT3_1 || \
+   devid == PCI_CHIP_KABYLAKE_ULT_GT3_2)
+
+#define IS_KBL_GT4(devid)   \
+  (devid == PCI_CHIP_KABYLAKE_HALO_GT4)
+
+#define IS_KABYLAKE(devid) (IS_KBL_GT1(devid) || IS_KBL_GT15(devid) || 
IS_KBL_GT2(devid) || IS_KBL_GT3(devid) || IS_KBL_GT4(devid))
+
+#define IS_GEN10(devid) IS_KABYLAKE(devid)
+
 #endif /* __CL_DEVICE_DATA_H__ */
 
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 3/3] KBL: add kabylake runtime support.

2016-08-01 Thread Yang Rong
Kabylake is almost same as skylake, so use skylake functions directly.

Signed-off-by: Yang Rong 
---
 src/cl_command_queue.c   |   2 +-
 src/cl_device_id.c   | 224 +--
 src/cl_gen10_device.h|  31 +++
 src/intel/intel_driver.c |   4 +-
 src/intel/intel_gpgpu.c  |   2 +-
 5 files changed, 213 insertions(+), 50 deletions(-)
 create mode 100644 src/cl_gen10_device.h

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index b66928f..07c5d89 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -222,7 +222,7 @@ cl_command_queue_ND_range(cl_command_queue queue,
   /* Check that the user did not forget any argument */
   TRY (cl_kernel_check_args, k);
 
-  if (ver == 7 || ver == 75 || ver == 8 || ver == 9)
+  if (ver == 7 || ver == 75 || ver == 8 || ver == 9 || ver == 10)
 TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, 
global_wk_sz, local_wk_sz);
   else
 FATAL ("Unknown Gen Device");
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 6ea..35a19a6 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -210,6 +210,61 @@ static struct _cl_device_id intel_bxt_device = {
 #include "cl_gen9_device.h"
 };
 
+static struct _cl_device_id intel_kbl_gt1_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 12,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt15_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 18,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 3,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt2_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 24,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 3,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt3_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 48,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 6,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt4_device = {
+  INIT_ICD(dispatch)
+  .max_compute_unit = 72,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 9,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
 LOCAL cl_device_id
 cl_get_gt_device(void)
 {
@@ -580,6 +635,98 @@ bxt_break:
   cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
   break;
 
+case PCI_CHIP_KABYLAKE_ULT_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake ULT GT1");
+case PCI_CHIP_KABYLAKE_DT_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake Desktop GT1");
+case PCI_CHIP_KABYLAKE_HALO_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake Halo GT1");
+case PCI_CHIP_KABYLAKE_ULX_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake ULX GT1");
+case PCI_CHIP_KABYLAKE_SRV_GT1:
+  DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD 
Graphics Kabylake Server GT1");
+kbl_gt1_break:
+  intel_kbl_gt1_device.device_id = device_id;
+  intel_kbl_gt1_device.platform = cl_get_platform_default();
+  ret = &intel_kbl_gt1_device;
+#ifdef ENABLE_FP64
+  cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+  cl_intel_platform_get_default_extension(ret);
+  cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+  break;
+
+case PCI_CHIP_KABYLAKE_ULT_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake ULT GT1.5");
+case PCI_CHIP_KABYLAKE_DT_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake Desktop GT1.5");
+case PCI_CHIP_KABYLAKE_HALO_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake Halo GT1.5");
+case PCI_CHIP_KABYLAKE_ULX_GT15:
+  DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) 
HD Graphics Kabylake ULX GT1.5");
+kbl_gt15_break:
+  intel_kbl_gt15_device.device_id = device_id;
+  intel_kbl_gt15_device.platform = cl_get_platform_default();
+  ret = &intel_kbl_gt15_device;
+#ifdef ENABLE_FP64
+  cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+  cl_intel_platform_get_default_extension(ret);
+  cl_intel_p

[Beignet] [PATCH 2/3] KBL: add kabylake backend support.

2016-08-01 Thread Yang Rong
Kabylake is almost same as skylake, derived it from GEN9.

Signed-off-by: Yang Rong 
---
 backend/src/CMakeLists.txt |   4 +++
 backend/src/backend/.gen_program.cpp.swp   | Bin 0 -> 32768 bytes
 backend/src/backend/gen10_context.cpp  |  31 ++
 backend/src/backend/gen10_context.hpp  |  49 +
 backend/src/backend/gen10_encoder.cpp  |  33 +++
 backend/src/backend/gen10_encoder.hpp  |  40 +++
 backend/src/backend/gen_insn_selection.cpp |  10 ++
 backend/src/backend/gen_insn_selection.hpp |   7 +
 backend/src/backend/gen_program.cpp|  15 +++--
 9 files changed, 186 insertions(+), 3 deletions(-)
 create mode 100644 backend/src/backend/.gen_program.cpp.swp
 create mode 100644 backend/src/backend/gen10_context.cpp
 create mode 100644 backend/src/backend/gen10_context.hpp
 create mode 100644 backend/src/backend/gen10_encoder.cpp
 create mode 100644 backend/src/backend/gen10_encoder.hpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 41eb5ec..f368ae1 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -116,6 +116,8 @@ set (GBE_SRC
 backend/gen8_context.cpp
 backend/gen9_context.hpp
 backend/gen9_context.cpp
+backend/gen10_context.hpp
+backend/gen10_context.cpp
 backend/gen_program.cpp
 backend/gen_program.hpp
 backend/gen_program.h
@@ -133,6 +135,8 @@ set (GBE_SRC
 backend/gen8_encoder.cpp
 backend/gen9_encoder.hpp
 backend/gen9_encoder.cpp
+backend/gen10_encoder.hpp
+backend/gen10_encoder.cpp
 )
 
 set (GBE_LINK_LIBRARIES
diff --git a/backend/src/backend/.gen_program.cpp.swp 
b/backend/src/backend/.gen_program.cpp.swp
new file mode 100644
index 
..72261df7820de9983acbb50dbd7e30e7a0e17033
GIT binary patch
literal 32768
zcmeI44RmBlb>ABk;|1?J<|Dy@05w{eX=x<2G`sA~YG!2gIci4I3~J3+W@ejK_iL%y
z{?NYndNkT$)&^q-uMM`rU>h4dC&t8(upuVj@nVSWoH&cYIfMoCv0ghNX6+L@0b?H%
zaPqtL@!sn%wX_31CpprY|8#e~s#{gJZdKiT>(*NvJD*)rM@R2Y@%P=S)cF_x!UZ+(
zoYa52nM!T@lGy)m+aIZ1TkD_ip>032=2h0~UYJ=c6z})yrA*)#qwnipJ-_KU*8M_l
zw79w1pWuFueGcq%V0Rp7hUJlCcckt*GCn2+?i@Ivp8vdayDPh&Yo7!A9N6c;J_q(W
zu+M>g4(xMap9A|G`2T_f;pVea-$zfr&GzO2`}vHv&%a@RSM8^B4Y+*YZqt|S=M(nR
zJ%5({ebs&*wh!F%@3854`}yX!=g+k1W&8PuZO?7;{>MHC_BpW6fqf3_b6}qX`yANk
zz&;1|Ik3-xeGcq%V4nkBI8Z31QU^(YmN)=7|9AHPH~wZS^*Hz>cpF#+F909^u2kwV
z@MbU%o&i372WjB%gQMVX@V{?QrTzl^IruX80(cMj2jCaL8^9It3h*7^Prfsi`p@7o
z@P6=CPzMWO1biF#+;dW?e*xYB>fqJjAb1&gDR>5W^4Y1>qu^bj30?z6z-{15x200Q
z2HpW40yjY(d@pzr_+L07{s=q;9tXb;{uy{Tco=L050tF9u)6>G3%D6!@3m2SE)Cfaic`
zgC`lc4}u>8OJD%}Eg<9h?O=C*O8?zdUN!L4jgl6S3A^s=+VYJ4!xTJnmG+Ge2`X6r%dH;ZAVQBQBqG-^>fDV@LO`2kO&f|at)
zG%~qX34`&`Vr?_b7aR4^Z&cOElhhD3O=zi;Dt*eU^R2C>p~19}4-Qu><#NMU>1L%K
z-gP7&stN_ij`MLsNw(!??YI!O{Y*BDjK@r;G+!!ZmCok!r{*8HQkJjF
z=N1=D#=r~Ln&q&~U#f^fmhDfE>y0@j|_horbExNBaZI68~Z2EOW
zQK?2V4AhAeYM|I?R!geh2vuX#tE&u5&D5LKsyZ;>c#=L)cB4L@U7S-xQde2Brw5|^
z1H)=yYk)@S>P~xp-K$PaZ2F9NE-Zx20C^~R^>A2MO};xT<&sxcr>5ufGiR30LU<)V
zb$%+lI5oXE2a#)yN=d<^?xYYQOD+gY6B7+Y+$+=$8sbKhqOnO3S@*)3M#;-n9`w>u
z!JtC6gHYX2hCNOi8HRv7jj~90Vgv_|E8(Z%sg+!qA$e7%R}sw-$sCZiji4GD|5+4oRhET#{KjJKMdZ3A#gry4!*2TY|tt(RWMZh90im)cjn2
zc5ZQQbuK@(vNCnaOm)kf8-_V%*wXbv%^QrDsCfPin>y1X=47zEOpnRDimIKMh`!qP
z>d|JfEA@4CP>2t!Xq@BqV1m7gX90_nr`FeGIYr*;#cH$UsT0k51u~->lZmuiGxV+}
z6Us)Sl1UXSWy!RfS8Mp&$&{e8V9_*ql1312S1BjzEmjNl^^6QOJ$0W`da-V4;6Sx{
zt(KAh^6TsYHga!tV{>yLrk2PQHYA^WzSB%g6tDOhcoVu(mR|!dOi(KJz4d&dT5S{y
zvY+gbGb6bff`Z+zMmLfTO_F342zDSeKBo5{@A^*dx_1|Oj_o4P(O2#w({8f35X!az
zO7=U-(v2~RhaEy=hB>>k;uT7spK)JWIjrR+*WBD}_+cj3X!=EOY2Du;&ytqpX}!^O
z$u>2+n#t*Tx8jw%Ja<+3g{{1uCtY$)SHdE)o-s|o;8#MgTbow>LOnoZs0EpGVWk>m
zW~z-kQ@)epNHMcmso!s@=-H)IW=JhAo?ptJpIgag&n)Mcvdd>y)QOYoh#JZy)F?HpUiW8G(G2>LmQO}fw3PNLcBJ2vcspg3lh$5My_#O3
zlij5b9&CFQb-2ALG__n=*9*xA+XmlFw{~11glp`GR|?H=12t~itG}*LtJGD_TW?&e
z6x4||n=%@hlvlCqd(}}C&`DjeT&a4>iL^u&XNHDQ6b#uZO?6_=WQFn~if|(g?}RqHzzWgt*x!m^?Gx(;ll_+AjnLP3cX8>
zrV2K+O0-e9=Aji7y~;JOqzWhso7<{UhQ6mxEuT}1UJ!V`>{Xs$sH(HgHE2OYH>=yT
zsIj67vuk)L?qLDx)-^B9t;{S;Ew4^ZXBV@pmlS#Cv#ZN32%(=y>
z6?OL9%GooyxlzRkc&$0stR}dpU?j!{lODQcWyGcA2AUsV3aDTqbWs)AV4HzgZdQkF|E*D>
zj*N~wmf4X){lgBREVIo=<&<4yP!;Ois#;(zP`0FINi?{D+7#{v8#H2VTQ_3fr{}pw
zIi&rbnr}4gCA}jZR~|y(`O=qKgS~cJTFFyrOXI1d7PvO;G>jdbC`*ljt?{_DUlj{=
zBqU~$R8=6Jz9H313ucfRzp}m&s(=4Jb!2S(h|0>uAl+wO)(vJVq{ROJWsJltVk=_*
zm#xA0W7zs{22Jof@DlJl*!rIUKMf|p-QWpq{ZE5;f(&Hn{#`gek};BSM^
zFfKm_?gKJLm%;xlf1-_}0lPaL`L$-HTFRIGMlCPf9~KEuCnnI=&YD{v(~@DH=d#&|
zVUjFeWv|_-u>V1jHNtE@mi(M
zzR+2B&ShF{OzSK=*58v%q8WY2^lFHu;7Z>}X}gf&$zi{5&Y*iwb89Cs1{v-%QFdBw
z9d=bl?WR*36@R{0^$LNP&?pug#rxCNg!xYXmYGwTe`R4cJuM6lF>F
zDmzhh`Q+#*RW4Q=ftSV&INPjk=Cto4;u0=Hy4w28(opCW;=pNnNZXAwrv$C3+>$V5
z8S8m88cpNak#5KzJ)?YHbh^Aq9_4$KG+me09B9ws;aA*F54ypDX1NfAn3vVUb+44K
z70~2_@oi~_V!TS=Efdx!5(XXH+T35VeW#7S5r~vV>~hlI%3;Jctkj#{amCooHtL5$
zbdbPmmlRiG$>G3UuZtpw&79J_(f_sel?GB^G-$5zk(RGzd{^m#lLLe5aEr?p-)sDM
zM97TL;t{wld6D{r_hVwBzS#&Op)|Eet&Un_Ny@xSYgyI~7Lz*&3w}uJu$rVHhPU!4
zQH?a6EQWEpWH%_

[Beignet] [PATCH] use different pointer alignment for different implementation

2016-08-01 Thread Guo Yejun
beignet only requirs 64 bytes alignment while other implementations
might require 4096 alignment.

and also change function cl_check_beignet for better output message.

Signed-off-by: Guo Yejun 
---
 utests/compiler_time_stamp.cpp|  4 +++-
 utests/runtime_use_host_ptr_image.cpp |  8 ++--
 utests/utest_helper.cpp   | 10 ++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp
index e376522..43165c1 100644
--- a/utests/compiler_time_stamp.cpp
+++ b/utests/compiler_time_stamp.cpp
@@ -16,8 +16,10 @@ static void cpu(int global_id, int *src, int *dst) {
 
 void compiler_time_stamp(void)
 {
-  if (!cl_check_beignet())
+  if (!cl_check_beignet()) {
+printf("Not beignet device , Skip!");
 return;
+  }
 
   const size_t n = 16;
   int cpu_dst[16], cpu_src[16];
diff --git a/utests/runtime_use_host_ptr_image.cpp 
b/utests/runtime_use_host_ptr_image.cpp
index 2de9194..4a30e89 100644
--- a/utests/runtime_use_host_ptr_image.cpp
+++ b/utests/runtime_use_host_ptr_image.cpp
@@ -18,8 +18,12 @@ static void runtime_use_host_ptr_image(void)
   desc.image_width = w;
   desc.image_height = h;
 
+  size_t alignment = 4096;  //page size
+  if (cl_check_beignet())
+alignment = 64; //cacheline size, beignet has loose limitaiont to 
enable userptr
+
   //src image
-  int ret = posix_memalign(&buf_data[0], 64, sizeof(uint32_t) * w * h);
+  int ret = posix_memalign(&buf_data[0], alignment, sizeof(uint32_t) * w * h);
   OCL_ASSERT(ret == 0);
   for (size_t i = 0; i < w*h; ++i)
 ((uint32_t*)buf_data[0])[i] = i;
@@ -27,7 +31,7 @@ static void runtime_use_host_ptr_image(void)
   OCL_CREATE_IMAGE(buf[0], CL_MEM_USE_HOST_PTR, &format, &desc, buf_data[0]);
 
   //dst image
-  ret = posix_memalign(&buf_data[1], 64, sizeof(uint32_t) * w * h);
+  ret = posix_memalign(&buf_data[1], alignment, sizeof(uint32_t) * w * h);
   OCL_ASSERT(ret == 0);
   for (size_t i = 0; i < w*h; ++i)
 ((uint32_t*)buf_data[1])[i] = 0;
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index da4cfbf..3388d9f 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -860,7 +860,6 @@ int cl_check_beignet(void)
   size_t ret_sz;
   OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_VERSION, 0, 0, 
¶m_value_size);
   if(param_value_size == 0) {
-printf("Not beignet device , Skip!");
 return 0;
   }
   char* device_version_str = (char* )malloc(param_value_size * sizeof(char) );
@@ -869,7 +868,6 @@ int cl_check_beignet(void)
 
   if(!strstr(device_version_str, "beignet")) {
 free(device_version_str);
-printf("Not beignet device , Skip!");
 return 0;
   }
   free(device_version_str);
@@ -906,8 +904,10 @@ int cl_check_ocl20(void)
 if(cl_check_beignet()) {
   printf("Beignet extension test!");
   return 1;
+} else {
+  printf("Not beignet device , Skip!");
+  return 0;
 }
-return 0;
   }
   char* device_version_str = (char* )malloc(param_value_size * sizeof(char) );
   OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 
param_value_size, (void*)device_version_str, &ret_sz);
@@ -919,8 +919,10 @@ int cl_check_ocl20(void)
 if(cl_check_beignet()) {
   printf("Beignet extension test!");
   return 1;
+} else {
+  printf("Not beignet device , Skip!");
+  return 0;
 }
-return 0;
   }
   free(device_version_str);
   return 1;
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] remove "\n" in output message when test is failed

2016-08-01 Thread Guo Yejun
otherwise, "[FAILED]" and the test name is not in the same line.

Signed-off-by: Guo Yejun 
---
 utests/builtin_lgamma.cpp   | 2 +-
 utests/builtin_lgamma_r.cpp | 2 +-
 utests/builtin_tgamma.cpp   | 2 +-
 utests/image_1D_buffer.cpp  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/utests/builtin_lgamma.cpp b/utests/builtin_lgamma.cpp
index 876699a..57945de 100644
--- a/utests/builtin_lgamma.cpp
+++ b/utests/builtin_lgamma.cpp
@@ -29,7 +29,7 @@ void builtin_lgamma(void) {
float cpu = lgamma(src[i]);
float gpu = dst[i];
if (fabsf(cpu - gpu) >= 1e-3) {
-   printf("%f %f %f\n", src[i], cpu, gpu);
+   printf("%f %f %f", src[i], cpu, gpu);
OCL_ASSERT(0);
}
}
diff --git a/utests/builtin_lgamma_r.cpp b/utests/builtin_lgamma_r.cpp
index b6e5d0e..0258767 100644
--- a/utests/builtin_lgamma_r.cpp
+++ b/utests/builtin_lgamma_r.cpp
@@ -34,7 +34,7 @@ void builtin_lgamma_r(void) {
int gpu_signp = ((int*)buf_data[2])[i];
float gpu = dst[i];
if (cpu_signp != gpu_signp || fabsf(cpu - gpu) >= 1e-3) 
{
-   printf("%f %f %f\n", src[i], cpu, gpu);
+   printf("%f %f %f", src[i], cpu, gpu);
OCL_ASSERT(0);
}
}
diff --git a/utests/builtin_tgamma.cpp b/utests/builtin_tgamma.cpp
index 204f49e..eb6bdd7 100644
--- a/utests/builtin_tgamma.cpp
+++ b/utests/builtin_tgamma.cpp
@@ -46,7 +46,7 @@ void builtin_tgamma(void)
   if (std::isinf(cpu)) {
 OCL_ASSERT(std::isinf(dst[i]));
   } else if (fabsf(cpu - dst[i]) >= cl_FLT_ULP(cpu) * ULPSIZE_FACTOR) {
-printf("%f %f %f\n", src[i], cpu, dst[i]);
+printf("%f %f %f", src[i], cpu, dst[i]);
 OCL_ASSERT(0);
   }
 }
diff --git a/utests/image_1D_buffer.cpp b/utests/image_1D_buffer.cpp
index f2eb7a3..66eb6e7 100644
--- a/utests/image_1D_buffer.cpp
+++ b/utests/image_1D_buffer.cpp
@@ -55,7 +55,7 @@ void image_1D_buffer(void)
   OCL_MAP_BUFFER(1);
   for (uint32_t i = 0; i < buffer_sz; i++) {
 if (((uint32_t*)buf_data[1])[i] != ((uint32_t*)buf_data[0])[i])
-  printf("i %d expected %x got %x \n", i, ((uint32_t*)buf_data[0])[i], 
((uint32_t*)buf_data[1])[i]);
+  printf("i %d expected %x got %x", i, ((uint32_t*)buf_data[0])[i], 
((uint32_t*)buf_data[1])[i]);
 OCL_ASSERT(((uint32_t*)buf_data[1])[i] == ((uint32_t*)buf_data[0])[i]);
   }
   OCL_UNMAP_BUFFER(0);
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet