From: Pan Xiuli <xiuli....@intel.com> Add ushort block read/write for buffer and image. Refine uint block read/write with suffix _ui.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- kernels/compiler_subgroup_buffer_block_read.cl | 47 ++++++++++-- kernels/compiler_subgroup_buffer_block_write.cl | 44 +++++++++-- kernels/compiler_subgroup_image_block_read.cl | 49 +++++++++++-- kernels/compiler_subgroup_image_block_write.cl | 46 +++++++++--- utests/compiler_subgroup_buffer_block_read.cpp | 73 +++++++++++++++--- utests/compiler_subgroup_buffer_block_write.cpp | 74 ++++++++++++++++--- utests/compiler_subgroup_image_block_read.cpp | 98 +++++++++++++++++++------ utests/compiler_subgroup_image_block_write.cpp | 73 +++++++++++++++--- 8 files changed, 412 insertions(+), 92 deletions(-) diff --git a/kernels/compiler_subgroup_buffer_block_read.cl b/kernels/compiler_subgroup_buffer_block_read.cl index 9edaa2e..4cbf894 100644 --- a/kernels/compiler_subgroup_buffer_block_read.cl +++ b/kernels/compiler_subgroup_buffer_block_read.cl @@ -1,31 +1,62 @@ -__kernel void compiler_subgroup_buffer_block_read1(global uint *src, global uint *dst) +__kernel void compiler_subgroup_buffer_block_read_ui1(global uint *src, global uint *dst) { int id = get_global_id(0); global uint * p = src + get_sub_group_id() * get_max_sub_group_size(); - uint tmp = intel_sub_group_block_read(p); + uint tmp = intel_sub_group_block_read_ui(p); dst[id] = tmp; } -__kernel void compiler_subgroup_buffer_block_read2(global uint *src, global uint2 *dst) +__kernel void compiler_subgroup_buffer_block_read_ui2(global uint *src, global uint2 *dst) { int id = get_global_id(0); global uint * p = src + get_sub_group_id() * get_max_sub_group_size()*2; - uint2 tmp = intel_sub_group_block_read2(p); + uint2 tmp = intel_sub_group_block_read_ui2(p); dst[id] = tmp; } -__kernel void compiler_subgroup_buffer_block_read4(global uint *src, global uint4 *dst) +__kernel void compiler_subgroup_buffer_block_read_ui4(global uint *src, global uint4 *dst) { int id = get_global_id(0); global uint * p = src + get_sub_group_id() * get_max_sub_group_size()*4; - uint4 tmp = intel_sub_group_block_read4(p); + uint4 tmp = intel_sub_group_block_read_ui4(p); dst[id] = tmp; } -__kernel void compiler_subgroup_buffer_block_read8(global uint *src, global uint8 *dst) +__kernel void compiler_subgroup_buffer_block_read_ui8(global uint *src, global uint8 *dst) { int id = get_global_id(0); global uint * p = src + get_sub_group_id() * get_max_sub_group_size()*8; - uint8 tmp = intel_sub_group_block_read8(p); + uint8 tmp = intel_sub_group_block_read_ui8(p); dst[id] = tmp; } +#ifdef SHORT +__kernel void compiler_subgroup_buffer_block_read_us1(global ushort *src, global ushort *dst) +{ + int id = get_global_id(0); + global ushort * p = src + get_sub_group_id() * get_max_sub_group_size(); + ushort tmp = intel_sub_group_block_read_us(p); + dst[id] = tmp; +} +__kernel void compiler_subgroup_buffer_block_read_us2(global ushort *src, global ushort2 *dst) +{ + int id = get_global_id(0); + global ushort * p = src + get_sub_group_id() * get_max_sub_group_size()*2; + ushort2 tmp = intel_sub_group_block_read_us2(p); + dst[id] = tmp; +} +__kernel void compiler_subgroup_buffer_block_read_us4(global ushort *src, global ushort4 *dst) +{ + int id = get_global_id(0); + global ushort * p = src + get_sub_group_id() * get_max_sub_group_size()*4; + ushort4 tmp = intel_sub_group_block_read_us4(p); + dst[id] = tmp; +} + +__kernel void compiler_subgroup_buffer_block_read_us8(global ushort *src, global ushort8 *dst) +{ + int id = get_global_id(0); + global ushort * p = src + get_sub_group_id() * get_max_sub_group_size()*8; + ushort8 tmp = intel_sub_group_block_read_us8(p); + dst[id] = tmp; +} +#endif diff --git a/kernels/compiler_subgroup_buffer_block_write.cl b/kernels/compiler_subgroup_buffer_block_write.cl index f735855..f452dcc 100644 --- a/kernels/compiler_subgroup_buffer_block_write.cl +++ b/kernels/compiler_subgroup_buffer_block_write.cl @@ -1,27 +1,55 @@ -__kernel void compiler_subgroup_buffer_block_write1(global uint *src, global uint *dst) +__kernel void compiler_subgroup_buffer_block_write_ui1(global uint *src, global uint *dst) { int id = get_global_id(0); global uint * p = dst + get_sub_group_id() * get_max_sub_group_size(); - intel_sub_group_block_write(p,src[id]); + intel_sub_group_block_write_ui(p,src[id]); } -__kernel void compiler_subgroup_buffer_block_write2(global uint2 *src, global uint *dst) +__kernel void compiler_subgroup_buffer_block_write_ui2(global uint2 *src, global uint *dst) { int id = get_global_id(0); global uint * p = dst + get_sub_group_id() * get_max_sub_group_size()*2; - intel_sub_group_block_write2(p,src[id]); + intel_sub_group_block_write_ui2(p,src[id]); } -__kernel void compiler_subgroup_buffer_block_write4(global uint4 *src, global uint *dst) +__kernel void compiler_subgroup_buffer_block_write_ui4(global uint4 *src, global uint *dst) { int id = get_global_id(0); global uint * p = dst + get_sub_group_id() * get_max_sub_group_size()*4; - intel_sub_group_block_write4(p,src[id]); + intel_sub_group_block_write_ui4(p,src[id]); } -__kernel void compiler_subgroup_buffer_block_write8(global uint8 *src, global uint *dst) +__kernel void compiler_subgroup_buffer_block_write_ui8(global uint8 *src, global uint *dst) { int id = get_global_id(0); global uint * p = dst + get_sub_group_id() * get_max_sub_group_size()*8; - intel_sub_group_block_write8(p,src[id]); + intel_sub_group_block_write_ui8(p,src[id]); } +#ifdef SHORT +__kernel void compiler_subgroup_buffer_block_write_us1(global ushort *src, global ushort *dst) +{ + int id = get_global_id(0); + global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size(); + intel_sub_group_block_write_us(p,src[id]); +} + +__kernel void compiler_subgroup_buffer_block_write_us2(global ushort2 *src, global ushort *dst) +{ + int id = get_global_id(0); + global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size()*2; + intel_sub_group_block_write_us2(p,src[id]); +} + +__kernel void compiler_subgroup_buffer_block_write_us4(global ushort4 *src, global ushort *dst) +{ + int id = get_global_id(0); + global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size()*4; + intel_sub_group_block_write_us4(p,src[id]); +} +__kernel void compiler_subgroup_buffer_block_write_us8(global ushort8 *src, global ushort *dst) +{ + int id = get_global_id(0); + global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size()*8; + intel_sub_group_block_write_us8(p,src[id]); +} +#endif diff --git a/kernels/compiler_subgroup_image_block_read.cl b/kernels/compiler_subgroup_image_block_read.cl index d5df6db..fa079b7 100644 --- a/kernels/compiler_subgroup_image_block_read.cl +++ b/kernels/compiler_subgroup_image_block_read.cl @@ -1,31 +1,64 @@ -__kernel void compiler_subgroup_image_block_read1(image2d_t src, global uint *dst) +__kernel void compiler_subgroup_image_block_read_ui1(image2d_t src, global uint *dst) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - uint tmp = intel_sub_group_block_read(src,coord); + uint tmp = intel_sub_group_block_read_ui(src,coord); dst[id] = tmp; } -__kernel void compiler_subgroup_image_block_read2(image2d_t src, global uint2 *dst) +__kernel void compiler_subgroup_image_block_read_ui2(image2d_t src, global uint2 *dst) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - uint2 tmp = intel_sub_group_block_read2(src,coord); + uint2 tmp = intel_sub_group_block_read_ui2(src,coord); dst[id] = tmp; } -__kernel void compiler_subgroup_image_block_read4(image2d_t src, global uint4 *dst) +__kernel void compiler_subgroup_image_block_read_ui4(image2d_t src, global uint4 *dst) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - uint4 tmp = intel_sub_group_block_read4(src,coord); + uint4 tmp = intel_sub_group_block_read_ui4(src,coord); dst[id] = tmp; } -__kernel void compiler_subgroup_image_block_read8(image2d_t src, global uint8 *dst) +__kernel void compiler_subgroup_image_block_read_ui8(image2d_t src, global uint8 *dst) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - uint8 tmp = intel_sub_group_block_read8(src,coord); + uint8 tmp = intel_sub_group_block_read_ui8(src,coord); dst[id] = tmp; } +#ifdef SHORT +__kernel void compiler_subgroup_image_block_read_us1(image2d_t src, global ushort *dst) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + ushort tmp = intel_sub_group_block_read_us(src,coord); + dst[id] = tmp; +} + +__kernel void compiler_subgroup_image_block_read_us2(image2d_t src, global ushort2 *dst) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + ushort2 tmp = intel_sub_group_block_read_us2(src,coord); + dst[id] = tmp; +} + +__kernel void compiler_subgroup_image_block_read_us4(image2d_t src, global ushort4 *dst) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + ushort4 tmp = intel_sub_group_block_read_us4(src,coord); + dst[id] = tmp; +} + +__kernel void compiler_subgroup_image_block_read_us8(image2d_t src, global ushort8 *dst) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + ushort8 tmp = intel_sub_group_block_read_us8(src,coord); + dst[id] = tmp; +} +#endif diff --git a/kernels/compiler_subgroup_image_block_write.cl b/kernels/compiler_subgroup_image_block_write.cl index d9b3717..7d97c59 100644 --- a/kernels/compiler_subgroup_image_block_write.cl +++ b/kernels/compiler_subgroup_image_block_write.cl @@ -1,27 +1,55 @@ -__kernel void compiler_subgroup_image_block_write1(image2d_t dst, global uint *src) +__kernel void compiler_subgroup_image_block_write_ui1(image2d_t dst, global uint *src) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - intel_sub_group_block_write(dst,coord, src[id]); + intel_sub_group_block_write_ui(dst,coord, src[id]); } - -__kernel void compiler_subgroup_image_block_write2(image2d_t dst, global uint2 *src) +__kernel void compiler_subgroup_image_block_write_ui2(image2d_t dst, global uint2 *src) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - intel_sub_group_block_write2(dst,coord, src[id]); + intel_sub_group_block_write_ui2(dst,coord, src[id]); } -__kernel void compiler_subgroup_image_block_write4(image2d_t dst, global uint4 *src) +__kernel void compiler_subgroup_image_block_write_ui4(image2d_t dst, global uint4 *src) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - intel_sub_group_block_write4(dst,coord, src[id]); + intel_sub_group_block_write_ui4(dst,coord, src[id]); } -__kernel void compiler_subgroup_image_block_write8(image2d_t dst, global uint8 *src) +__kernel void compiler_subgroup_image_block_write_ui8(image2d_t dst, global uint8 *src) { int id = get_global_id(0); int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0); - intel_sub_group_block_write8(dst,coord, src[id]); + intel_sub_group_block_write_ui8(dst,coord, src[id]); +} +#ifdef SHORT +__kernel void compiler_subgroup_image_block_write_us1(image2d_t dst, global ushort *src) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + intel_sub_group_block_write_us(dst,coord, src[id]); +} + +__kernel void compiler_subgroup_image_block_write_us2(image2d_t dst, global ushort2 *src) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + intel_sub_group_block_write_us2(dst,coord, src[id]); +} + +__kernel void compiler_subgroup_image_block_write_us4(image2d_t dst, global ushort4 *src) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + intel_sub_group_block_write_us4(dst,coord, src[id]); +} + +__kernel void compiler_subgroup_image_block_write_us8(image2d_t dst, global ushort8 *src) +{ + int id = get_global_id(0); + int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0); + intel_sub_group_block_write_us8(dst,coord, src[id]); } +#endif diff --git a/utests/compiler_subgroup_buffer_block_read.cpp b/utests/compiler_subgroup_buffer_block_read.cpp index 9707f19..74bc899 100644 --- a/utests/compiler_subgroup_buffer_block_read.cpp +++ b/utests/compiler_subgroup_buffer_block_read.cpp @@ -64,6 +64,7 @@ static void generate_data(T* &input, input[(gid + lid)*VEC_SIZE + vsz] += ((rand() % 2 - 1) * base_val); /* add trailing random bits, tests GENERAL cases */ input[(gid + lid)*VEC_SIZE + vsz] += (rand() % 112); + //input[(gid + lid)*VEC_SIZE + vsz] = (gid + lid)*VEC_SIZE + vsz; #if DEBUG_STDOUT /* output generated input */ @@ -156,47 +157,95 @@ static void subgroup_generic(T* input, /* * subgroup buffer block read */ -void compiler_subgroup_buffer_block_read1(void) +void compiler_subgroup_buffer_block_read_ui1(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read", - "compiler_subgroup_buffer_block_read1"); + "compiler_subgroup_buffer_block_read_ui1"); subgroup_generic(input, expected, 1); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read1); -void compiler_subgroup_buffer_block_read2(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui1); +void compiler_subgroup_buffer_block_read_ui2(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read", - "compiler_subgroup_buffer_block_read2"); + "compiler_subgroup_buffer_block_read_ui2"); subgroup_generic(input, expected, 2); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read2); -void compiler_subgroup_buffer_block_read4(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui2); +void compiler_subgroup_buffer_block_read_ui4(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read", - "compiler_subgroup_buffer_block_read4"); + "compiler_subgroup_buffer_block_read_ui4"); subgroup_generic(input, expected, 4); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read4); -void compiler_subgroup_buffer_block_read8(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui4); +void compiler_subgroup_buffer_block_read_ui8(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read", - "compiler_subgroup_buffer_block_read8"); + "compiler_subgroup_buffer_block_read_ui8"); subgroup_generic(input, expected, 8); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read8); +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui8); +void compiler_subgroup_buffer_block_read_us1(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl", + "compiler_subgroup_buffer_block_read_us1", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us1); +void compiler_subgroup_buffer_block_read_us2(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl", + "compiler_subgroup_buffer_block_read_us2", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 2); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us2); +void compiler_subgroup_buffer_block_read_us4(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl", + "compiler_subgroup_buffer_block_read_us4", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 4); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us4); +void compiler_subgroup_buffer_block_read_us8(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl", + "compiler_subgroup_buffer_block_read_us8", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 8); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us8); diff --git a/utests/compiler_subgroup_buffer_block_write.cpp b/utests/compiler_subgroup_buffer_block_write.cpp index 6b257c5..3b538da 100644 --- a/utests/compiler_subgroup_buffer_block_write.cpp +++ b/utests/compiler_subgroup_buffer_block_write.cpp @@ -64,6 +64,7 @@ static void generate_data(T* &input, input[(gid + lid)*VEC_SIZE + vsz] += ((rand() % 2 - 1) * base_val); /* add trailing random bits, tests GENERAL cases */ input[(gid + lid)*VEC_SIZE + vsz] += (rand() % 112); + //input[(gid + lid)*VEC_SIZE + vsz] = (gid + lid)*VEC_SIZE + vsz; #if DEBUG_STDOUT /* output generated input */ @@ -156,47 +157,96 @@ static void subgroup_generic(T* input, /* * subgroup buffer block write */ -void compiler_subgroup_buffer_block_write1(void) +void compiler_subgroup_buffer_block_write_ui1(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write", - "compiler_subgroup_buffer_block_write1"); + "compiler_subgroup_buffer_block_write_ui1"); subgroup_generic(input, expected, 1); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write1); -void compiler_subgroup_buffer_block_write2(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui1); +void compiler_subgroup_buffer_block_write_ui2(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write", - "compiler_subgroup_buffer_block_write2"); + "compiler_subgroup_buffer_block_write_ui2"); subgroup_generic(input, expected, 2); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write2); -void compiler_subgroup_buffer_block_write4(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui2); +void compiler_subgroup_buffer_block_write_ui4(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write", - "compiler_subgroup_buffer_block_write4"); + "compiler_subgroup_buffer_block_write_ui4"); subgroup_generic(input, expected, 4); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write4); -void compiler_subgroup_buffer_block_write8(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui4); +void compiler_subgroup_buffer_block_write_ui8(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write", - "compiler_subgroup_buffer_block_write8"); + "compiler_subgroup_buffer_block_write_ui8"); subgroup_generic(input, expected, 8); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write8); +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui8); + +void compiler_subgroup_buffer_block_write_us1(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl", + "compiler_subgroup_buffer_block_write_us1", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us1); +void compiler_subgroup_buffer_block_write_us2(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl", + "compiler_subgroup_buffer_block_write_us2", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 2); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us2); +void compiler_subgroup_buffer_block_write_us4(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl", + "compiler_subgroup_buffer_block_write_us4", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 4); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us4); +void compiler_subgroup_buffer_block_write_us8(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl", + "compiler_subgroup_buffer_block_write_us8", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 8); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us8); diff --git a/utests/compiler_subgroup_image_block_read.cpp b/utests/compiler_subgroup_image_block_read.cpp index 02c8f07..879d622 100644 --- a/utests/compiler_subgroup_image_block_read.cpp +++ b/utests/compiler_subgroup_image_block_read.cpp @@ -21,7 +21,7 @@ static void compute_expected(T* input, { for(uint32_t i = 0; i < WG_GLOBAL_SIZE; i++) for(uint32_t j = 0; j < VEC_SIZE; j++) - expected[i * VEC_SIZE + j] = input[WG_GLOBAL_SIZE * j + i]; + expected[i * VEC_SIZE + j] = input[WG_GLOBAL_SIZE * 4 / sizeof(T) * j + i]; } /* @@ -33,7 +33,8 @@ static void generate_data(T* &input, size_t VEC_SIZE) { /* allocate input and expected arrays */ - input = new T[WG_GLOBAL_SIZE * VEC_SIZE]; + int* input_ui = new int[WG_GLOBAL_SIZE * VEC_SIZE]; + input = (T*)input_ui; expected = new T[WG_GLOBAL_SIZE * VEC_SIZE]; /* base value for all data types */ @@ -45,19 +46,22 @@ static void generate_data(T* &input, #if DEBUG_STDOUT cout << endl << "IN: " << endl; #endif + uint32_t rpitch = sizeof(uint32_t) * WG_GLOBAL_SIZE / sizeof(T); /* generate inputs and expected values */ - for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE * VEC_SIZE; gid++) - { - /* initially 0, augment after */ - input[gid] = ((rand() % 2 - 1) * base_val) + (rand() % 112); + for(uint32_t h = 0; h < VEC_SIZE; ++h) { + for(uint32_t w = 0; w < WG_GLOBAL_SIZE; ++w) + { + /* initially 0, augment after */ + input[w + h * rpitch] = ((rand() % 2 - 1) * base_val) + (rand() % 112); + //input[w + h * rpitch] = w + h * WG_GLOBAL_SIZE; #if DEBUG_STDOUT - /* output generated input */ - cout << setw(4) << input[gid] << ", " ; - if((gid + 1) % 8 == 0) - cout << endl; + /* output generated input */ + cout << setw(4) << input[w + h * rpitch] << ", " ; + if((w+ 1) % 8 == 0) + cout << endl; #endif - + } } /* expected values */ compute_expected(input, expected, VEC_SIZE); @@ -151,47 +155,95 @@ static void subgroup_generic(T* input, /* * sub_group image block read functions */ -void compiler_subgroup_image_block_read1(void) +void compiler_subgroup_image_block_read_ui1(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read", - "compiler_subgroup_image_block_read1"); + "compiler_subgroup_image_block_read_ui1"); subgroup_generic(input, expected, 1); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read1); -void compiler_subgroup_image_block_read2(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui1); +void compiler_subgroup_image_block_read_ui2(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read", - "compiler_subgroup_image_block_read2"); + "compiler_subgroup_image_block_read_ui2"); subgroup_generic(input, expected, 2); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read2); -void compiler_subgroup_image_block_read4(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui2); +void compiler_subgroup_image_block_read_ui4(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read", - "compiler_subgroup_image_block_read4"); + "compiler_subgroup_image_block_read_ui4"); subgroup_generic(input, expected, 4); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read4); -void compiler_subgroup_image_block_read8(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui4); +void compiler_subgroup_image_block_read_ui8(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read", - "compiler_subgroup_image_block_read8"); + "compiler_subgroup_image_block_read_ui8"); + subgroup_generic(input, expected, 8); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui8); +void compiler_subgroup_image_block_read_us1(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl", + "compiler_subgroup_image_block_read_us1", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us1); +void compiler_subgroup_image_block_read_us2(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl", + "compiler_subgroup_image_block_read_us2", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 2); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us2); +void compiler_subgroup_image_block_read_us4(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl", + "compiler_subgroup_image_block_read_us4", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 4); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us4); +void compiler_subgroup_image_block_read_us8(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl", + "compiler_subgroup_image_block_read_us8", + SOURCE, "-DSHORT"); subgroup_generic(input, expected, 8); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read8); +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us8); diff --git a/utests/compiler_subgroup_image_block_write.cpp b/utests/compiler_subgroup_image_block_write.cpp index 2b85167..98cbb0f 100644 --- a/utests/compiler_subgroup_image_block_write.cpp +++ b/utests/compiler_subgroup_image_block_write.cpp @@ -50,6 +50,7 @@ static void generate_data(T* &input, { /* initially 0, augment after */ input[gid] = ((rand() % 2 - 1) * base_val) + (rand() % 112); + //input[gid] = gid; #if DEBUG_STDOUT /* output generated input */ @@ -155,47 +156,95 @@ static void subgroup_generic(T* input, /* * sub_group image block write functions */ -void compiler_subgroup_image_block_write1(void) +void compiler_subgroup_image_block_write_ui1(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write", - "compiler_subgroup_image_block_write1"); + "compiler_subgroup_image_block_write_ui1"); subgroup_generic(input, expected, 1); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write1); -void compiler_subgroup_image_block_write2(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui1); +void compiler_subgroup_image_block_write_ui2(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write", - "compiler_subgroup_image_block_write2"); + "compiler_subgroup_image_block_write_ui2"); subgroup_generic(input, expected, 2); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write2); -void compiler_subgroup_image_block_write4(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui2); +void compiler_subgroup_image_block_write_ui4(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write", - "compiler_subgroup_image_block_write4"); + "compiler_subgroup_image_block_write_ui4"); subgroup_generic(input, expected, 4); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write4); -void compiler_subgroup_image_block_write8(void) +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui4); +void compiler_subgroup_image_block_write_ui8(void) { if(!cl_check_subgroups()) return; cl_uint *input = NULL; cl_uint *expected = NULL; OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write", - "compiler_subgroup_image_block_write8"); + "compiler_subgroup_image_block_write_ui8"); subgroup_generic(input, expected, 8); } -MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write8); +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui8); +void compiler_subgroup_image_block_write_us1(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl", + "compiler_subgroup_image_block_write_us1", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us1); +void compiler_subgroup_image_block_write_us2(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl", + "compiler_subgroup_image_block_write_us2", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 2); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us2); +void compiler_subgroup_image_block_write_us4(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl", + "compiler_subgroup_image_block_write_us4", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 4); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us4); +void compiler_subgroup_image_block_write_us8(void) +{ + if(!cl_check_subgroups_short()) + return; + cl_ushort *input = NULL; + cl_ushort *expected = NULL; + OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl", + "compiler_subgroup_image_block_write_us8", + SOURCE, "-DSHORT"); + subgroup_generic(input, expected, 8); +} +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us8); -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet