Could you please move the kernel source code to file instead of staying in host code? You can refer to my pending patch "move enqueue_copy_image kernels outside of runtime code", thanks.
Luo Xionghu Best Regards -----Original Message----- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Yang, Rong R Sent: Monday, May 19, 2014 3:14 PM To: Lv, Meng; beignet@lists.freedesktop.org Cc: Lv, Meng Subject: Re: [Beignet] [PATCH] [PATCH]improve the clEnqueueCopyBufferRect performance in some cases The one index indicate to one kernel string, because you add a new kernel for CL_ENQUEUE_COPY_BUFFER_RECT, you should also add a new index for it. And the file mode change 100644 => 100755, I think it is not necessary. -----Original Message----- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Lv Meng Sent: Monday, May 05, 2014 10:50 AM To: beignet@lists.freedesktop.org Cc: Lv, Meng Subject: [Beignet] [PATCH] [PATCH]improve the clEnqueueCopyBufferRect performance in some cases Signed-off-by: Lv Meng <meng...@intel.com> --- src/cl_mem.c | 80 ++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 21 deletions(-) mode change 100644 => 100755 src/cl_mem.c diff --git a/src/cl_mem.c b/src/cl_mem.c old mode 100644 new mode 100755 index 44482f7..92f51d0 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -911,6 +911,17 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1}; + + // the src and dst mem rect is continuous, the copy is degraded to + buf copy if((region[0] == dst_row_pitch) && (region[0] == + src_row_pitch) && (region[1] * src_row_pitch == src_slice_pitch) && (region[1] * dst_row_pitch == dst_slice_pitch)){ + cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; + cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; + cl_int size = region[0]*region[1]*region[2]; + ret = cl_mem_copy(queue, src_buf, dst_buf,src_offset, dst_offset, size); + return ret; + } + if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; @@ -919,30 +930,57 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, cl_int index = CL_ENQUEUE_COPY_BUFFER_RECT; cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; - - static const char *str_kernel = - "kernel void __cl_cpy_buffer_rect ( \n" - " global char* src, global char* dst, \n" - " unsigned int region0, unsigned int region1, unsigned int region2, \n" - " unsigned int src_offset, unsigned int dst_offset, \n" - " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" - " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" - " int i = get_global_id(0); \n" - " int j = get_global_id(1); \n" - " int k = get_global_id(2); \n" - " if((i >= region0) || (j>= region1) || (k>=region2)) \n" - " return; \n" - " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" - " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" - " dst[dst_offset] = src[src_offset]; \n" - "}"; - - /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); + if( (src_offset % 4== 0) && (dst_offset % 4== 0) && (src_row_pitch % 4== 0) && (dst_row_pitch % 4== 0) + && (src_slice_pitch % 4== 0) && (dst_slice_pitch % 4== 0) && (global_sz[0] % 4 == 0) ){ + global_sz[0] /= 4; + src_offset /= 4; + dst_offset /= 4; + src_row_pitch /= 4; + dst_row_pitch /= 4; + src_slice_pitch /= 4; + dst_slice_pitch /= 4; + static const char *str_intkernel = + "kernel void __cl_cpy_buffer_rect ( \n" + " global int* src, global int* dst, \n" + " unsigned int region0, unsigned int region1, unsigned int region2, \n" + " unsigned int src_offset, unsigned int dst_offset, \n" + " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" + " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" + " int i = get_global_id(0); \n" + " int j = get_global_id(1); \n" + " int k = get_global_id(2); \n" + " region0 >>= 2; \n" + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" + " return; \n" + " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" + " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" + " dst[dst_offset] = src[src_offset]; \n" + "}"; + /* setup the kernel and run. */ + ker = cl_context_get_static_kernel(queue->ctx, index, + str_intkernel, NULL); } else { + static const char *str_kernel = + "kernel void __cl_cpy_buffer_rect ( \n" + " global char* src, global char* dst, \n" + " unsigned int region0, unsigned int region1, unsigned int region2, \n" + " unsigned int src_offset, unsigned int dst_offset, \n" + " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" + " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" + " int i = get_global_id(0); \n" + " int j = get_global_id(1); \n" + " int k = get_global_id(2); \n" + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" + " return; \n" + " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" + " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" + " dst[dst_offset] = src[src_offset]; \n" + "}"; + /* setup the kernel and run. */ + ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, + NULL); } - /* setup the kernel and run. */ - ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL); if (!ker) return CL_OUT_OF_RESOURCES; -- 1.8.3.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet