LGTM, will push latter. Thanks.
On Mon, Oct 21, 2013 at 03:47:56PM +0800, Yang Rong wrote: > > Signed-off-by: Yang Rong <rong.r.y...@intel.com> > --- > kernels/compiler_async_copy.cl | 38 +++++++++++-------- > utests/compiler_async_copy.cpp | 86 > +++++++++++++++++++++++++----------------- > 2 files changed, 74 insertions(+), 50 deletions(-) > > diff --git a/kernels/compiler_async_copy.cl b/kernels/compiler_async_copy.cl > index a2432a4..06ec8e7 100644 > --- a/kernels/compiler_async_copy.cl > +++ b/kernels/compiler_async_copy.cl > @@ -1,16 +1,24 @@ > -__kernel void > -compiler_async_copy(__global int2 *dst, __global int2 *src, __local int2 > *localBuffer, int copiesPerWorkItem) > -{ > - event_t event; > - int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); > - int i; > - event = async_work_group_copy((__local int2*)localBuffer, (__global const > int2*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, > (event_t)0 ); > - wait_group_events( 1, &event ); > - > - for(i=0; i<copiesPerWorkItem; i++) > - localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ > get_local_id( 0 )*copiesPerWorkItem+i ] + (int2)(3, 3); > - barrier(CLK_LOCAL_MEM_FENCE); > - > - event = async_work_group_copy((__global > int2*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const > int2*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); > - wait_group_events( 1, &event ); > +#define DEF(TYPE) \ > +kernel void \ > +compiler_async_copy_##TYPE(__global TYPE *dst, __global TYPE *src, __local > TYPE *localBuffer, int copiesPerWorkItem) \ > +{ \ > + event_t event; \ > + int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); \ > + int i; \ > + event = async_work_group_copy((__local TYPE*)localBuffer, (__global const > TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, > (event_t)0 ); \ > + wait_group_events( 1, &event ); \ > +\ > + event = async_work_group_copy((__global > TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const > TYPE*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); \ > + wait_group_events( 1, &event ); \ > } > + > +DEF(char2); > +DEF(uchar2); > +DEF(short2); > +DEF(ushort2); > +DEF(int2); > +DEF(uint2); > +DEF(long2); > +DEF(ulong2); > +DEF(float2); > +DEF(double2); > diff --git a/utests/compiler_async_copy.cpp b/utests/compiler_async_copy.cpp > index 9384f85..7951ff7 100644 > --- a/utests/compiler_async_copy.cpp > +++ b/utests/compiler_async_copy.cpp > @@ -1,39 +1,55 @@ > #include "utest_helper.hpp" > +#include <stdint.h> > > -static void compiler_async_copy(void) > -{ > - const size_t n = 1024; > - const size_t local_size = 32; > - const int copiesPerWorkItem = 5; > +typedef unsigned char uchar; > +typedef unsigned short ushort; > > - // Setup kernel and buffers > - OCL_CREATE_KERNEL("compiler_async_copy"); > - OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(int) * 2, > NULL); > - OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(int) * 2, > NULL); > - OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > - OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); > - OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(int)*2, NULL); > - OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); > +#define DEF(TYPE, KER_TYPE, VEC_SIZE) \ > +static void compiler_async_copy_##KER_TYPE##VEC_SIZE(void) \ > +{ \ > + const size_t n = 1024; \ > + const size_t local_size = 32; \ > + const int copiesPerWorkItem = 5; \ > +\ > + /* Setup kernel and buffers */\ > + OCL_CREATE_KERNEL_FROM_FILE("compiler_async_copy", "compiler_async_copy_" > # KER_TYPE # VEC_SIZE); \ > + OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(TYPE) * > VEC_SIZE, NULL); \ > + OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(TYPE) * > VEC_SIZE, NULL); \ > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ > + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ > + OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(TYPE)*VEC_SIZE, NULL); \ > + OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); \ > +\ > + OCL_MAP_BUFFER(1); \ > + for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; ++i) \ > + ((TYPE*)buf_data[1])[i] = rand(); \ > + OCL_UNMAP_BUFFER(1); \ > +\ > + /* Run the kernel */\ > + globals[0] = n; \ > + locals[0] = local_size; \ > + OCL_NDRANGE(1); \ > + OCL_MAP_BUFFER(0); \ > + OCL_MAP_BUFFER(1); \ > +\ > + /* Check results */\ > + TYPE *dst = (TYPE*)buf_data[0]; \ > + TYPE *src = (TYPE*)buf_data[1]; \ > + for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; i++) \ > + OCL_ASSERT(dst[i] == src[i]); \ > + OCL_UNMAP_BUFFER(0); \ > + OCL_UNMAP_BUFFER(1); \ > +} \ > +\ > +MAKE_UTEST_FROM_FUNCTION(compiler_async_copy_##KER_TYPE##VEC_SIZE); > > - OCL_MAP_BUFFER(1); > - for (uint32_t i = 0; i < n * copiesPerWorkItem * 2; ++i) > - ((int*)buf_data[1])[i] = rand(); > - OCL_UNMAP_BUFFER(1); > - > - // Run the kernel > - globals[0] = n; > - locals[0] = local_size; > - OCL_NDRANGE(1); > - OCL_MAP_BUFFER(0); > - OCL_MAP_BUFFER(1); > - > - // Check results > - int *dst = (int*)buf_data[0]; > - int *src = (int*)buf_data[1]; > - for (uint32_t i = 0; i < n * copiesPerWorkItem * 2; i++) > - OCL_ASSERT(dst[i] == src[i] + 3); > - OCL_UNMAP_BUFFER(0); > - OCL_UNMAP_BUFFER(1); > -} > - > -MAKE_UTEST_FROM_FUNCTION(compiler_async_copy); > +DEF(char, char, 2); > +DEF(uchar, uchar, 2); > +DEF(short, short, 2); > +DEF(ushort, ushort, 2); > +DEF(int, int, 2); > +DEF(uint, uint, 2); > +DEF(int64_t, long, 2); > +DEF(uint64_t, ulong, 2); > +DEF(float, float, 2); > +DEF(double, double, 2); > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet