This test case assume the SIMD width is 16 and will fail if we set SIMD width 
to 8.
We need to make sure all utests could pass with both simd8 and simd16, please
fix this minor issue.

The other patches in this patch set LGTM.

Thanks,
Zhigang Gong.

On Tue, May 12, 2015 at 04:28:03PM +0800, Guo Yejun wrote:
> Signed-off-by: Guo Yejun <yejun....@intel.com>
> ---
>  kernels/compiler_sub_group_shuffle.cl | 18 ++++++++++++++
>  utests/compiler_sub_group_shuffle.cpp | 45 
> +++++++++++++++++++++++++++++++++++
>  2 files changed, 63 insertions(+)
>  create mode 100644 kernels/compiler_sub_group_shuffle.cl
>  create mode 100644 utests/compiler_sub_group_shuffle.cpp
> 
> diff --git a/kernels/compiler_sub_group_shuffle.cl 
> b/kernels/compiler_sub_group_shuffle.cl
> new file mode 100644
> index 0000000..a5ac943
> --- /dev/null
> +++ b/kernels/compiler_sub_group_shuffle.cl
> @@ -0,0 +1,18 @@
> +__kernel void compiler_sub_group_shuffle(global int *dst, int c)
> +{
> +  int i = get_global_id(0);
> +  if (i == 0)
> +    dst[0] = get_sub_group_size();
> +  dst++;
> +
> +  int from = i;
> +  int j = get_sub_group_size() - get_local_id(0) - 1;
> +  int o0 = get_sub_group_id();
> +  int o1 = intel_sub_group_shuffle(from, c);
> +  int o2 = intel_sub_group_shuffle(from, 5);
> +  int o3 = intel_sub_group_shuffle(from, j);
> +  dst[i*4] = o0;
> +  dst[i*4+1] = o1;
> +  dst[i*4+2] = o2;
> +  dst[i*4+3] = o3;
> +}
> diff --git a/utests/compiler_sub_group_shuffle.cpp 
> b/utests/compiler_sub_group_shuffle.cpp
> new file mode 100644
> index 0000000..4ba8b99
> --- /dev/null
> +++ b/utests/compiler_sub_group_shuffle.cpp
> @@ -0,0 +1,45 @@
> +#include "utest_helper.hpp"
> +
> +void compiler_sub_group_shuffle(void)
> +{
> +  const size_t n = 32;
> +  const int32_t buf_size = 4 * n + 1;
> +
> +  // Setup kernel and buffers
> +  OCL_CREATE_KERNEL("compiler_sub_group_shuffle");
> +  OCL_CREATE_BUFFER(buf[0], 0, buf_size * sizeof(int), NULL);
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +
> +  int c = 3;
> +  OCL_SET_ARG(1, sizeof(int), &c);
> +
> +  globals[0] = n;
> +  locals[0] = 16;
> +
> +  OCL_MAP_BUFFER(0);
> +  for (int32_t i = 0; i < buf_size; ++i)
> +    ((int*)buf_data[0])[i] = -1;
> +  OCL_UNMAP_BUFFER(0);
> +
> +  // Run the kernel on GPU
> +  OCL_NDRANGE(1);
> +
> +  // Compare
> +  OCL_MAP_BUFFER(0);
> +  int* dst = (int *)buf_data[0];
> +  int suggroupsize = dst[0];
> +  OCL_ASSERT(suggroupsize == 8 || suggroupsize == 16);
> +
> +  dst++;
> +  for (int32_t i = 0; i < (int32_t) n; ++i){
> +    int round = i / suggroupsize;
> +    int index = i % suggroupsize;
> +    OCL_ASSERT(index == dst[4*i]);
> +    OCL_ASSERT((round * suggroupsize + c) == dst[4*i+1]);
> +    OCL_ASSERT((round * suggroupsize + 5) == dst[4*i+2]);
> +    OCL_ASSERT((round * suggroupsize + (suggroupsize - index - 1)) == 
> dst[4*i+3]);
> +  }
> +  OCL_UNMAP_BUFFER(0);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_shuffle);
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to