Hi Junyan, Please help to review this patch set. Thanks. Luo Xionghu Best Regards
-----Original Message----- From: Luo, Xionghu Sent: Thursday, May 08, 2014 12:49 PM To: beignet@lists.freedesktop.org Cc: Luo, Xionghu Subject: [PATCH 1/3] [opencl-1.2] move enqueue_copy_image kernels outside of runtime code. From: Luo <xionghu....@intel.com> seperate the kernel code from host code to make it clean; build the kernels offline by gbe_bin_generator to improve the performance. --- src/CMakeLists.txt | 25 ++- src/cl_context.h | 16 +- src/cl_gt_device.h | 23 ++- src/cl_mem.c | 214 ++++++--------------- src/kernels/cl_internal_copy_buf_align1.cl | 8 - src/kernels/cl_internal_copy_buf_align16.cl | 2 +- src/kernels/cl_internal_copy_buf_align4.cl | 2 +- src/kernels/cl_internal_copy_buf_rect.cl | 15 ++ .../cl_internal_copy_buf_unalign_dst_offset.cl | 2 +- .../cl_internal_copy_buf_unalign_same_offset.cl | 2 +- .../cl_internal_copy_buf_unalign_src_offset.cl | 2 +- src/kernels/cl_internal_copy_buffer_to_image_2d.cl | 18 ++ src/kernels/cl_internal_copy_buffer_to_image_3d.cl | 19 ++ src/kernels/cl_internal_copy_image_2d_to_2d.cl | 21 ++ src/kernels/cl_internal_copy_image_2d_to_3d.cl | 22 +++ src/kernels/cl_internal_copy_image_2d_to_buffer.cl | 19 ++ src/kernels/cl_internal_copy_image_3d_to_2d.cl | 22 +++ src/kernels/cl_internal_copy_image_3d_to_3d.cl | 23 +++ src/kernels/cl_internal_copy_image_3d_to_buffer.cl | 22 +++ 19 files changed, 301 insertions(+), 176 deletions(-) delete mode 100644 src/kernels/cl_internal_copy_buf_align1.cl create mode 100644 src/kernels/cl_internal_copy_buf_rect.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d.cl create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3bac02..da7e1eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,14 +17,33 @@ foreach (KF ${KERNEL_FILES}) endforeach (KF) endmacro (MakeKernelBinStr) +macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES) + set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl) + set (file_content) + file (REMOVE ${output_file}) + foreach (KF ${KERNEL_NAMES}) + set (input_file ${KERNEL_PATH}/${KF}.cl) + file(READ ${input_file} file_content ) + STRING(REGEX REPLACE ";" "\\\\;" file_content "${file_content}") + file(APPEND ${output_file} ${file_content}) + endforeach (KF) +endmacro (MakeBuiltInKernelStr) + set (KERNEL_STR_FILES) -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 +set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset -cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 -cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign +cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d +cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d +cl_internal_copy_image_3d_to_3d cl_internal_copy_image_2d_to_buffer +cl_internal_copy_image_3d_to_buffer +cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d +cl_internal_fill_buf_unalign cl_internal_fill_buf_align2 +cl_internal_fill_buf_align4 cl_internal_fill_buf_align8 cl_internal_fill_buf_align128) +set (BUILT_IN_NAME cl_internal_built_in_kernel) MakeBuiltInKernelStr +("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") +MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" +"${BUILT_IN_NAME}") set(OPENCL_SRC ${KERNEL_STR_FILES} diff --git a/src/cl_context.h b/src/cl_context.h index b2562ce..65b1728 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -47,14 +47,14 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, - CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d - CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d - CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d + CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d + CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, pattern size=1 CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, pattern size=2 CL_ENQUEUE_FILL_BUFFER_ALIGN4, //fill buffer with 4 aligne pattern, pattern size=4 diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index 110988a..8ec6528 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -75,7 +75,28 @@ DECL_INFO_STRING(version, LIBCL_VERSION_STRING) DECL_INFO_STRING(profile, "FULL_PROFILE") DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING) DECL_INFO_STRING(extensions, "") -DECL_INFO_STRING(built_in_kernels, "") +DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" + "__cl_copy_region_align16;" + "__cl_cpy_region_unalign_same_offset;" + "__cl_copy_region_unalign_dst_offset;" + "__cl_copy_region_unalign_src_offset;" + "__cl_copy_buffer_rect;" + "__cl_copy_image_2d_to_2d;" + "__cl_copy_image_3d_to_2d;" + "__cl_copy_image_2d_to_3d;" + "__cl_copy_image_3d_to_3d;" + "__cl_copy_image_2d_to_buffer;" + "__cl_copy_image_3d_to_buffer;" + "__cl_copy_buffer_to_image_2d;" + "__cl_copy_buffer_to_image_3d;" + "__cl_fill_region_unalign;" + "__cl_fill_region_align2;" + "__cl_fill_region_align4;" + "__cl_fill_region_align8_2;" + "__cl_fill_region_align8_4;" + "__cl_fill_region_align8_8;" + "__cl_fill_region_align8_16;" + "__cl_fill_region_align128;") DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) #undef DECL_INFO_STRING diff --git a/src/cl_mem.c b/src/cl_mem.c index 3f1b389..0687790 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -1018,33 +1018,19 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; - cl_int index = CL_ENQUEUE_COPY_BUFFER_RECT; cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; - static const char *str_kernel = - "kernel void __cl_cpy_buffer_rect ( \n" - " global char* src, global char* dst, \n" - " unsigned int region0, unsigned int region1, unsigned int region2, \n" - " unsigned int src_offset, unsigned int dst_offset, \n" - " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" - " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" - " int i = get_global_id(0); \n" - " int j = get_global_id(1); \n" - " int k = get_global_id(2); \n" - " if((i >= region0) || (j>= region1) || (k>=region2)) \n" - " return; \n" - " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" - " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" - " dst[dst_offset] = src[src_offset]; \n" - "}"; - - /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); /* setup the kernel and run. */ - ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL); + extern char cl_internal_copy_buf_rect_str[]; extern int + cl_internal_copy_buf_rect_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT, + cl_internal_copy_buf_rect_str, + (size_t)cl_internal_copy_buf_rect_str_size, NULL); + if (!ker) return CL_OUT_OF_RESOURCES; @@ -1073,8 +1059,6 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; - cl_int index = CL_ENQUEUE_COPY_IMAGE_0; - char option[40] = ""; uint32_t fixupDataType; uint32_t savedIntelFmt; @@ -1084,15 +1068,6 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; - if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { - strcat(option, "-D SRC_IMAGE_3D"); - index += 1; - } - if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { - strcat(option, " -D DST_IMAGE_3D"); - index += 2; - } - switch (src_image->fmt.image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: fixupDataType = CL_UNSIGNED_INT8; break; @@ -1115,54 +1090,41 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image src_image->intel_fmt = cl_image_get_intel_format(&fmt); dst_image->intel_fmt = src_image->intel_fmt; } - static const char *str_kernel = - "#ifdef SRC_IMAGE_3D \n" - " #define SRC_IMAGE_TYPE image3d_t \n" - " #define SRC_COORD_TYPE int4 \n" - "#else \n" - " #define SRC_IMAGE_TYPE image2d_t \n" - " #define SRC_COORD_TYPE int2 \n" - "#endif \n" - "#ifdef DST_IMAGE_3D \n" - " #define DST_IMAGE_TYPE image3d_t \n" - " #define DST_COORD_TYPE int4 \n" - "#else \n" - " #define DST_IMAGE_TYPE image2d_t \n" - " #define DST_COORD_TYPE int2 \n" - "#endif \n" - "kernel void __cl_copy_image ( \n" - " __read_only SRC_IMAGE_TYPE src_image, __write_only DST_IMAGE_TYPE dst_image, \n" - " unsigned int region0, unsigned int region1, unsigned int region2, \n" - " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n" - " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { \n" - " int i = get_global_id(0); \n" - " int j = get_global_id(1); \n" - " int k = get_global_id(2); \n" - " int4 color; \n" - " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n" - " SRC_COORD_TYPE src_coord; \n" - " DST_COORD_TYPE dst_coord; \n" - " if((i >= region0) || (j>= region1) || (k>=region2)) \n" - " return; \n" - " src_coord.x = src_origin0 + i; \n" - " src_coord.y = src_origin1 + j; \n" - "#ifdef SRC_IMAGE_3D \n" - " src_coord.z = src_origin2 + k; \n" - "#endif \n" - " dst_coord.x = dst_origin0 + i; \n" - " dst_coord.y = dst_origin1 + j; \n" - "#ifdef DST_IMAGE_3D \n" - " dst_coord.z = dst_origin2 + k; \n" - "#endif \n" - " color = read_imagei(src_image, sampler, src_coord); \n" - " write_imagei(dst_image, dst_coord, color); \n" - "}"; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_image->base.ctx == dst_image->base.ctx); /* setup the kernel and run. */ - ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); + if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + extern char cl_internal_copy_image_2d_to_2d_str[]; + extern int cl_internal_copy_image_2d_to_2d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, + cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL); + }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + extern char cl_internal_copy_image_2d_to_3d_str[]; + extern int cl_internal_copy_image_2d_to_3d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, + cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL); + } + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + extern char cl_internal_copy_image_3d_to_2d_str[]; + extern int cl_internal_copy_image_3d_to_2d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, + cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL); + }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + extern char cl_internal_copy_image_3d_to_3d_str[]; + extern int cl_internal_copy_image_3d_to_3d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, + cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL); + } + } + if (!ker) { ret = CL_OUT_OF_RESOURCES; goto fail; @@ -1198,8 +1160,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; - cl_int index = CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0; - char option[40] = ""; uint32_t intel_fmt, bpp; cl_image_format fmt; size_t origin0, region0; @@ -1210,42 +1170,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; - if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { - strcat(option, "-D IMAGE_3D"); - index += 1; - } - - static const char *str_kernel = - "#ifdef IMAGE_3D \n" - " #define IMAGE_TYPE image3d_t \n" - " #define COORD_TYPE int4 \n" - "#else \n" - " #define IMAGE_TYPE image2d_t \n" - " #define COORD_TYPE int2 \n" - "#endif \n" - "kernel void __cl_copy_image_to_buffer ( \n" - " __read_only IMAGE_TYPE image, global uchar* buffer, \n" - " unsigned int region0, unsigned int region1, unsigned int region2, \n" - " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n" - " unsigned int dst_offset) { \n" - " int i = get_global_id(0); \n" - " int j = get_global_id(1); \n" - " int k = get_global_id(2); \n" - " uint4 color; \n" - " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n" - " COORD_TYPE src_coord; \n" - " if((i >= region0) || (j>= region1) || (k>=region2)) \n" - " return; \n" - " src_coord.x = src_origin0 + i; \n" - " src_coord.y = src_origin1 + j; \n" - "#ifdef IMAGE_3D \n" - " src_coord.z = src_origin2 + k; \n" - "#endif \n" - " color = read_imageui(image, sampler, src_coord); \n" - " dst_offset += (k * region1 + j) * region0 + i; \n" - " buffer[dst_offset] = color.x; \n" - "}"; - /* We use one kernel to copy the data. The kernel is lazily created. */ assert(image->base.ctx == buffer->ctx); @@ -1261,7 +1185,20 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; /* setup the kernel and run. */ - ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); + if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { + extern char cl_internal_copy_image_2d_to_buffer_str[]; + extern int cl_internal_copy_image_2d_to_buffer_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, + cl_internal_copy_image_2d_to_buffer_str, + (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL); }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { + extern char cl_internal_copy_image_3d_to_buffer_str[]; + extern int cl_internal_copy_image_3d_to_buffer_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, + cl_internal_copy_image_3d_to_buffer_str, + (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL); } + if (!ker) { ret = CL_OUT_OF_RESOURCES; goto fail; @@ -1297,8 +1234,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; - cl_int index = CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0; - char option[40] = ""; uint32_t intel_fmt, bpp; cl_image_format fmt; size_t origin0, region0; @@ -1309,41 +1244,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; - if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { - strcat(option, "-D IMAGE_3D"); - index += 1; - } - - static const char *str_kernel = - "#ifdef IMAGE_3D \n" - " #define IMAGE_TYPE image3d_t \n" - " #define COORD_TYPE int4 \n" - "#else \n" - " #define IMAGE_TYPE image2d_t \n" - " #define COORD_TYPE int2 \n" - "#endif \n" - "kernel void __cl_copy_image_to_buffer ( \n" - " __read_only IMAGE_TYPE image, global uchar* buffer, \n" - " unsigned int region0, unsigned int region1, unsigned int region2, \n" - " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, \n" - " unsigned int src_offset) { \n" - " int i = get_global_id(0); \n" - " int j = get_global_id(1); \n" - " int k = get_global_id(2); \n" - " uint4 color = (uint4)(0); \n" - " COORD_TYPE dst_coord; \n" - " if((i >= region0) || (j>= region1) || (k>=region2)) \n" - " return; \n" - " dst_coord.x = dst_origin0 + i; \n" - " dst_coord.y = dst_origin1 + j; \n" - "#ifdef IMAGE_3D \n" - " dst_coord.z = dst_origin2 + k; \n" - "#endif \n" - " src_offset += (k * region1 + j) * region0 + i; \n" - " color.x = buffer[src_offset]; \n" - " write_imageui(image, dst_coord, color); \n" - "}"; - /* We use one kernel to copy the data. The kernel is lazily created. */ assert(image->base.ctx == buffer->ctx); @@ -1359,7 +1259,19 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; /* setup the kernel and run. */ - ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); + if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { + extern char cl_internal_copy_buffer_to_image_2d_str[]; + extern int cl_internal_copy_buffer_to_image_2d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, + cl_internal_copy_buffer_to_image_2d_str, + (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL); }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { + extern char cl_internal_copy_buffer_to_image_3d_str[]; + extern int cl_internal_copy_buffer_to_image_3d_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, + cl_internal_copy_buffer_to_image_3d_str, + (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL); } if (!ker) return CL_OUT_OF_RESOURCES; diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl deleted file mode 100644 index cd3ec7b..0000000 --- a/src/kernels/cl_internal_copy_buf_align1.cl +++ /dev/null @@ -1,8 +0,0 @@ -kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset, - global char* dst, unsigned int dst_offset, - unsigned int size) -{ - int i = get_global_id(0); - if (i < size) - dst[i+dst_offset] = src[i+src_offset]; -} diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl index 75b1a4a..1abb4e9 100644 --- a/src/kernels/cl_internal_copy_buf_align16.cl +++ b/src/kernels/cl_internal_copy_buf_align16.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset, +kernel void __cl_copy_region_align16 ( global float* src, unsigned int +src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl index 44a0f81..27174ca 100644 --- a/src/kernels/cl_internal_copy_buf_align4.cl +++ b/src/kernels/cl_internal_copy_buf_align4.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset, +kernel void __cl_copy_region_align4 ( global float* src, unsigned int +src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { diff --git a/src/kernels/cl_internal_copy_buf_rect.cl b/src/kernels/cl_internal_copy_buf_rect.cl new file mode 100644 index 0000000..71e7484 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_rect.cl @@ -0,0 +1,15 @@ +kernel void __cl_copy_buffer_rect ( global char* src, global char* dst, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_offset, unsigned int dst_offset, + unsigned int src_row_pitch, unsigned int src_slice_pitch, + unsigned int dst_row_pitch, +unsigned int dst_slice_pitch) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_offset += k * src_slice_pitch + j * src_row_pitch + i; + dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; + dst[dst_offset] = src[src_offset]; +} diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl index 13f4162..e02d0e5 100644 --- a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl +++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset, +kernel void __cl_copy_region_unalign_dst_offset ( global int* src, +unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask, diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl index 8510246..83b6e97 100644 --- a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl +++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset, +kernel void __cl_copy_region_unalign_same_offset ( global int* src, +unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask) diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl index f98368a..ce0aa1d 100644 --- a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl +++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset, +kernel void __cl_copy_region_unalign_src_offset ( global int* src, +unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask, diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d.cl b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl new file mode 100644 index 0000000..a218b58 --- /dev/null +++ b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl @@ -0,0 +1,18 @@ +kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, + unsigned int src_offset) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color = (uint4)(0); + int2 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + src_offset += (k * region1 + j) * region0 + i; + color.x = buffer[src_offset]; + write_imageui(image, dst_coord, color); } diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl new file mode 100644 index 0000000..84d3b27 --- /dev/null +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl @@ -0,0 +1,19 @@ +kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, + unsigned int src_offset) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color = (uint4)(0); + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + src_offset += (k * region1 + j) * region0 + i; + color.x = buffer[src_offset]; + write_imageui(image, dst_coord, color); } diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_to_2d.cl new file mode 100644 index 0000000..c5eaab1 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_2d.cl @@ -0,0 +1,21 @@ +kernel void __cl_copy_image_2d_to_2d(__read_only image2d_t src_image, __write_only image2d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int +dst_origin1, unsigned int dst_origin2) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + int2 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); } diff --git a/src/kernels/cl_internal_copy_image_2d_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_to_3d.cl new file mode 100644 index 0000000..4c73a74 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_3d.cl @@ -0,0 +1,22 @@ +kernel void __cl_copy_image_2d_to_3d(__read_only image2d_t src_image, __write_only image3d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, +unsigned int dst_origin1, unsigned int dst_origin2) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); } diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer.cl b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl new file mode 100644 index 0000000..b6c352e --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl @@ -0,0 +1,19 @@ +kernel void __cl_copy_image_2d_to_buffer( __read_only image2d_t image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_offset) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + color = read_imageui(image, sampler, src_coord); + dst_offset += (k * region1 + j) * region0 + i; + buffer[dst_offset] = color.x; +} diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d.cl b/src/kernels/cl_internal_copy_image_3d_to_2d.cl new file mode 100644 index 0000000..e0effa0 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_2d.cl @@ -0,0 +1,22 @@ +kernel void __cl_copy_image_3d_to_2d(__read_only image3d_t src_image, __write_only image2d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int +dst_origin1, unsigned int dst_origin2) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int2 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); } diff --git a/src/kernels/cl_internal_copy_image_3d_to_3d.cl b/src/kernels/cl_internal_copy_image_3d_to_3d.cl new file mode 100644 index 0000000..de80a0a --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_3d.cl @@ -0,0 +1,23 @@ +kernel void __cl_copy_image_3d_to_3d(__read_only image3d_t src_image, __write_only image3d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int +dst_origin1, unsigned int dst_origin2) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); } diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl new file mode 100644 index 0000000..dcfc8a2 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl @@ -0,0 +1,22 @@ +#define IMAGE_TYPE image3d_t +#define COORD_TYPE int4 +kernel void __cl_copy_image_3d_to_buffer ( __read_only IMAGE_TYPE image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_offset) { + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + COORD_TYPE src_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + color = read_imageui(image, sampler, src_coord); + dst_offset += (k * region1 + j) * region0 + i; + buffer[dst_offset] = color.x; +} -- 1.8.1.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet