Hi Grigore, I found what is wrong with broadcast 2D and 3D, see inline comments. You are useing int for the input date!
Thanks Xiuli On Mon, Apr 11, 2016 at 05:40:56PM +0300, Grigore Lupescu wrote: > From: Grigore Lupescu <grigore.lupescu at intel.com> > > Added the following unit tests: > compiler_workgroup_broadcast_1D_int > compiler_workgroup_broadcast_1D_long > compiler_workgroup_broadcast_2D_int > compiler_workgroup_broadcast_2D_long > compiler_workgroup_broadcast_3D_int > compiler_workgroup_broadcast_3D_long > > Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> > --- > kernels/compiler_workgroup_broadcast.cl | 128 ++++++++++++- > utests/compiler_workgroup_broadcast.cpp | 319 > +++++++++++++++++++++++++++++--- > 2 files changed, 410 insertions(+), 37 deletions(-) > > diff --git a/kernels/compiler_workgroup_broadcast.cl > b/kernels/compiler_workgroup_broadcast.cl > index 4df74e3..47ff0b7 100644 > --- a/kernels/compiler_workgroup_broadcast.cl > +++ b/kernels/compiler_workgroup_broadcast.cl > @@ -1,9 +1,121 @@ > -kernel void compiler_workgroup_broadcast(global uint *src, global uint *dst) > { > - uint val = src[get_group_id(0)*(get_local_size(1) * get_local_size(0)) > - + get_group_id(1)*(get_local_size(1) * get_local_size(0) * > get_num_groups(0)) > - + get_local_id(1)* get_local_size(0) + get_local_id(0)]; > - uint bv = work_group_broadcast(val, 8, 3); > - dst[get_group_id(0)*(get_local_size(1) * get_local_size(0)) > - + get_group_id(1)*(get_local_size(1) * get_local_size(0) * > get_num_groups(0)) > - + get_local_id(1)* get_local_size(0) + get_local_id(0)] = bv; > +/* > + * Workgroup broadcast 1D functions > + */ > + > +kernel void compiler_workgroup_broadcast_1D_int(global int *src, > + global int *dst, > + uint wg_local_x, > + uint wg_local_y, > + uint wg_local_z) > +{ > + uint offset = 0; > + uint index = offset + get_global_id(0); > + > + int val = src[index]; > + int broadcast_val = work_group_broadcast(val, > + wg_local_x); > + dst[index] = broadcast_val; > +} > + > +kernel void compiler_workgroup_broadcast_1D_long(global long *src, > + global long *dst, > + uint wg_local_x, > + uint wg_local_y, > + uint wg_local_z) > +{ > + uint offset = 0; > + uint index = offset + get_global_id(0); > + > + long val = src[index]; > + long broadcast_val = work_group_broadcast(val, > + wg_local_x); > + dst[index] = broadcast_val; > +} > + > +/* > + * Workgroup broadcast 2D functions > + */ > +kernel void compiler_workgroup_broadcast_2D_int(global int *src, > + global int *dst, > + uint wg_local_x, > + uint wg_local_y, > + uint wg_local_z) > +{ > + uint lsize = get_local_size(0) * get_local_size(1); > + uint offset = get_group_id(0) * lsize + > + get_group_id(1) * get_num_groups(0) * lsize; > + uint index = offset + get_local_id(0) + > + get_local_id(1) * get_local_size(0); > + > + int val = src[index]; > + int broadcast_val = work_group_broadcast(val, > + wg_local_x, > + wg_local_y); > + dst[index] = broadcast_val; > +} > + > +kernel void compiler_workgroup_broadcast_2D_long(global long *src, > + global long *dst, > + uint wg_local_x, > + uint wg_local_y, > + uint wg_local_z) > +{ > + uint lsize = get_local_size(0) * get_local_size(1); > + uint offset = get_group_id(0) * lsize + > + get_group_id(1) * get_num_groups(0) * lsize; > + uint index = offset + get_local_id(0) + > + get_local_id(1) * get_local_size(0); > + > + long val = src[index]; > + long broadcast_val = work_group_broadcast(val, > + wg_local_x, > + wg_local_y); > + dst[index] = broadcast_val; > +} > + > +/* > + * Workgroup broadcast 3D functions > + */ > +kernel void compiler_workgroup_broadcast_3D_int(global int *src, > + global int *dst, > + uint wg_local_x, > + uint wg_local_y, > + uint wg_local_z) > +{ > + uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2); > + uint offset = get_group_id(0) * lsize + > + get_group_id(1) * get_num_groups(0) * lsize + > + get_group_id(2) * get_num_groups(1) * get_num_groups(0) * lsize; > + uint index = offset + get_local_id(0) + > + get_local_id(1) * get_local_size(0) + > + get_local_id(2) * get_local_size(1) * get_local_size(0); > + > + int val = src[index]; > + int broadcast_val = work_group_broadcast(val, > + wg_local_x, > + wg_local_y, > + wg_local_z); > + dst[index] = broadcast_val; > +} > + > +kernel void compiler_workgroup_broadcast_3D_long(global long *src, > + global long *dst, > + uint wg_local_x, > + uint wg_local_y, > + uint wg_local_z) > +{ > + uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2); > + uint offset = get_group_id(0) * lsize + > + get_group_id(1) * get_num_groups(0) * lsize + > + get_group_id(2) * get_num_groups(0) * get_num_groups(1) * lsize; > + uint index = offset + get_local_id(0) + > + get_local_id(1) * get_local_size(0) + > + get_local_id(2) * get_local_size(1) * get_local_size(0); > + > + long val = src[index]; > + long broadcast_val = work_group_broadcast(val, > + wg_local_x, > + wg_local_y, > + wg_local_z); > + dst[index] = broadcast_val; > } > diff --git a/utests/compiler_workgroup_broadcast.cpp > b/utests/compiler_workgroup_broadcast.cpp > index d45e5d8..aff95ff 100644 > --- a/utests/compiler_workgroup_broadcast.cpp > +++ b/utests/compiler_workgroup_broadcast.cpp > @@ -3,45 +3,306 @@ > #include <iostream> > #include "utest_helper.hpp" > > -void compiler_workgroup_broadcast(void) > +using namespace std; > + > +/* set to 1 for debug, output of input-expected data */ > +#define DEBUG_STDOUT 0 > + > +/* NDRANGE */ > +#define WG_GLOBAL_SIZE_X 16 > +#define WG_GLOBAL_SIZE_Y 4 > +#define WG_GLOBAL_SIZE_Z 4 > + > +#define WG_LOCAL_SIZE_X 16 > +#define WG_LOCAL_SIZE_Y 2 > +#define WG_LOCAL_SIZE_Z 2 > + > +/* TODO debug bellow case, lid2 always stays 0, instead of 0 and 1 > + * > + * #define WG_GLOBAL_SIZE_X 16 > + * #define WG_GLOBAL_SIZE_Y 1 > + * #define WG_GLOBAL_SIZE_Z 4 > + * > + * #define WG_LOCAL_SIZE_X 16 > + * #define WG_LOCAL_SIZE_Y 1 > + * #define WG_LOCAL_SIZE_Z 2 > + */ > + > +#define WG_LOCAL_X 5 > +#define WG_LOCAL_Y 0 > +#define WG_LOCAL_Z 0 > + > +enum WG_BROADCAST > +{ > + WG_BROADCAST_1D, > + WG_BROADCAST_2D, > + WG_BROADCAST_3D > +}; > + > +/* > + * Generic compute-expected function for op BROADCAST type > + * and any variable type > + */ > +template<class T> > +static void compute_expected(WG_BROADCAST wg_broadcast, > + T* input, > + T* expected, > + uint32_t wg_global_size, > + uint32_t wg_local_size) > +{ > + if(wg_broadcast == WG_BROADCAST_1D) > + { > + for(uint32_t i = 0; i < wg_local_size; i++) > + expected[i] = input[WG_LOCAL_X]; > + } > + else if(wg_broadcast == WG_BROADCAST_2D) > + { > + for(uint32_t i = 0; i < wg_local_size; i++) > + expected[i] = > + input[WG_LOCAL_X + > + WG_LOCAL_Y * WG_LOCAL_SIZE_X]; > + } > + else if(wg_broadcast == WG_BROADCAST_3D) > + { > + for(uint32_t i = 0; i < wg_local_size; i++) > + expected[i] = > + input[WG_LOCAL_X + > + WG_LOCAL_Y * WG_LOCAL_SIZE_X + > + WG_LOCAL_Z * WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y]; > + } > +} > + > +/* > + * Generic input-expected generate function for op BROADCAST type > + * and any variable type > + */ > +template<class T> > +static void generate_data(WG_BROADCAST wg_broadcast, > + T* &input, > + T* &expected, > + uint32_t &wg_global_size, > + uint32_t &wg_local_size) > +{ > + if(wg_broadcast == WG_BROADCAST_1D) > + { > + wg_global_size = WG_GLOBAL_SIZE_X; > + wg_local_size = WG_LOCAL_SIZE_X; > + } > + else if(wg_broadcast == WG_BROADCAST_2D) > + { > + wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y; > + wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y; > + } > + else if(wg_broadcast == WG_BROADCAST_3D) > + { > + wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * WG_GLOBAL_SIZE_Z; > + wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y * WG_LOCAL_SIZE_Z; > + } > + > + /* allocate input and expected arrays */ > + input = new T[wg_global_size]; > + expected = new T[wg_global_size]; > + > + /* base value for all data types */ > + T base_val = (long)7 << (sizeof(T) * 5 - 3); > + > + /* seed for random inputs */ > + srand (time(NULL)); > + > + /* generate inputs and expected values */ > + for(uint32_t gid = 0; gid < wg_global_size; gid += wg_local_size) > + { > +#if DEBUG_STDOUT > + cout << endl << "IN: " << endl; > +#endif > + > + /* input values */ > + for(uint32_t lid = 0; lid < wg_local_size; lid++) > + { > + /* initially 0, augment after */ > + input[gid + lid] = 0; > + > + /* check all data types, test ideal for QWORD types */ > + input[gid + lid] += ((rand() % 2 - 1) * base_val); > + /* add trailing random bits, tests GENERAL cases */ > + input[gid + lid] += (rand() % 112); > + > +#if DEBUG_STDOUT > + /* output generated input */ > + cout << setw(4) << input[gid + lid] << ", " ; > + if((lid + 1) % 8 == 0) > + cout << endl; > +#endif > + } > + > + /* expected values */ > + compute_expected(wg_broadcast, input + gid, expected + gid, > wg_global_size, wg_local_size); > + > +#if DEBUG_STDOUT > + /* output expected input */ > + cout << endl << "EXP: " << endl; > + for(uint32_t lid = 0; lid < wg_local_size; lid++){ > + cout << setw(4) << expected[gid + lid] << ", " ; > + if((lid + 1) % 8 == 0) > + cout << endl; > + } > +#endif > + > + } > +} > + > +/* > + * Generic workgroup utest function for op BROADCAST type > + * and any variable type > + */ > +template<class T> > +static void workgroup_generic(WG_BROADCAST wg_broadcast, > + T* input, > + T* expected) > { > - const size_t n0 = 32; > - const size_t n1 = 16; > - const size_t n = n0 * n1; > - uint32_t src[n]; > + uint32_t wg_global_size = 0; > + uint32_t wg_local_size = 0; > > - // Setup kernel and buffers > - OCL_CREATE_KERNEL("compiler_workgroup_broadcast"); > - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); > - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); > + cl_uint wg_local_x = WG_LOCAL_X; > + cl_uint wg_local_y = WG_LOCAL_Y; > + cl_uint wg_local_z = WG_LOCAL_Z; > + > + /* input and expected data */ > + generate_data(wg_broadcast, input, expected, wg_global_size, > wg_local_size); > + > + /* prepare input for datatype */ > + OCL_CREATE_BUFFER(buf[0], 0, wg_global_size * sizeof(T), NULL); > + OCL_CREATE_BUFFER(buf[1], 0, wg_global_size * sizeof(T), NULL); > OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); > - globals[0] = n0; > - globals[1] = n1; > - locals[0] = 16; > - locals[1] = 16; > + OCL_SET_ARG(2, sizeof(cl_uint), &wg_local_x); > + OCL_SET_ARG(3, sizeof(cl_uint), &wg_local_y); > + OCL_SET_ARG(4, sizeof(cl_uint), &wg_local_z); > > - for (int32_t i = 0; i < (int32_t) n; ++i) { > - src[i] = i; > - } > + /* set input data for GPU */ > OCL_MAP_BUFFER(0); > - memcpy(buf_data[0], src, sizeof(src)); > + memcpy(buf_data[0], input, wg_global_size * sizeof(T)); > OCL_UNMAP_BUFFER(0); > > - // Run the kernel on GPU > - OCL_NDRANGE(2); > - > - // Compare > - OCL_MAP_BUFFER(1); > - for (int32_t i = 0; i < (int32_t) n/2; ++i) { > -// printf("%u ", ((uint32_t *)buf_data[1])[i]); > - OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 56); > + /* run the kernel on GPU */ > + if(wg_broadcast == WG_BROADCAST_1D) > + { > + globals[0] = WG_GLOBAL_SIZE_X; > + locals[0] = WG_LOCAL_SIZE_X; > + OCL_NDRANGE(1); > + } > + else if(wg_broadcast == WG_BROADCAST_2D) > + { > + globals[0] = WG_GLOBAL_SIZE_X; > + locals[0] = WG_LOCAL_SIZE_X; > + globals[1] = WG_GLOBAL_SIZE_Y; > + locals[1] = WG_LOCAL_SIZE_Y; > + OCL_NDRANGE(2); > } > - for (int32_t i = n/2; i < (int32_t) n; ++i) { > - // printf("%u ", ((uint32_t *)buf_data[1])[i]); > - OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 312); > + else if(wg_broadcast == WG_BROADCAST_3D) > + { > + globals[0] = WG_GLOBAL_SIZE_X; > + locals[0] = WG_LOCAL_SIZE_X; > + globals[1] = WG_GLOBAL_SIZE_Y; > + locals[1] = WG_LOCAL_SIZE_Y; > + globals[2] = WG_GLOBAL_SIZE_Z; > + locals[2] = WG_LOCAL_SIZE_Y; > + OCL_NDRANGE(3); > } > + > + /* check if mismatch */ > + OCL_MAP_BUFFER(1); > + uint32_t mismatches = 0; > + > + for (uint32_t i = 0; i < wg_global_size; i++) > + if(((T *)buf_data[1])[i] != *(expected + i)) > + { > + /* found mismatch, increment */ > + mismatches++; > + > +#if DEBUG_STDOUT > + /* output mismatch */ > + cout << "Err at " << i << ", " << > + ((T *)buf_data[1])[i] << " != " << *(expected + i) << endl; > +#endif > + } > + > +#if DEBUG_STDOUT > + /* output mismatch count */ > + cout << "mismatches " << mismatches << endl; > +#endif > + > OCL_UNMAP_BUFFER(1); > + > + OCL_ASSERT(mismatches == 0); > +} > + > +/* > + * Workgroup broadcast 1D functions > + */ > +void compiler_workgroup_broadcast_1D_int(void) > +{ > + cl_int *input = NULL; > + cl_int *expected = NULL; > + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", > + "compiler_workgroup_broadcast_1D_int"); > + workgroup_generic(WG_BROADCAST_1D, input, expected); > } > +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int); > > -MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast); > +void compiler_workgroup_broadcast_1D_long(void) > +{ > + cl_long *input = NULL; > + cl_long *expected = NULL; > + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", > + "compiler_workgroup_broadcast_1D_long"); > + workgroup_generic(WG_BROADCAST_1D, input, expected); > +} > +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_long); > + > +/* > + * Workgroup broadcast 2D functions > + */ > +void compiler_workgroup_broadcast_2D_int(void) > +{ > + cl_int *input = NULL; > + cl_int *expected = NULL; > + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", > + "compiler_workgroup_broadcast_2D_int"); > + workgroup_generic(WG_BROADCAST_2D, input, expected); > +} > +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int); > + > +void compiler_workgroup_broadcast_2D_long(void) > +{ > + cl_int *input = NULL; > + cl_int *expected = NULL; cl_long here! > + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", > + "compiler_workgroup_broadcast_2D_long"); > + workgroup_generic(WG_BROADCAST_2D, input, expected); > +} > +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_long); > + > + > +/* > + * Workgroup broadcast 3D functions > + */ > +void compiler_workgroup_broadcast_3D_int(void) > +{ > + cl_int *input = NULL; > + cl_int *expected = NULL; > + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", > + "compiler_workgroup_broadcast_3D_int"); > + workgroup_generic(WG_BROADCAST_3D, input, expected); > +} > +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int); > + > +void compiler_workgroup_broadcast_3D_long(void) > +{ > + cl_int *input = NULL; > + cl_int *expected = NULL; cl_long here, too! > + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", > + "compiler_workgroup_broadcast_3D_long"); > + workgroup_generic(WG_BROADCAST_3D, input, expected); > +} > +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_long); > -- > 2.5.0 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet