As sample LD message doesn't support array index, we have to create a 2D array surface with the same buffer object. Thus one 1D array image will have two surfaces binded to it one is the index and the second is 128 + index.
And then at kernel side, we will access the corresponding 2D array surface when the LD message is required otherwise will access the origin 1D array surface. Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 9 +- backend/src/ir/instruction.cpp | 2 +- backend/src/ocl_stdlib.tmpl.h | 161 +++++++++++++++++++---------- src/cl_api.c | 5 +- src/cl_command_queue.c | 5 + src/cl_device_id.c | 1 + src/cl_device_id.h | 1 + src/cl_gt_device.h | 1 + src/cl_mem.c | 29 +++--- src/intel/intel_gpgpu.c | 7 +- 10 files changed, 149 insertions(+), 72 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index ecb64cd..986aa3e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3606,10 +3606,15 @@ namespace gbe msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType()); msgLen = srcNum; } - uint32_t bti = insn.getImageIndex(); + // We switch to a fixup bti for linear filter on a image1d array sampling. + uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0); + if (bti > 253) { + std::cerr << "Too large bti " << bti; + return false; + } uint32_t sampler = insn.getSamplerIndex(); - sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset()); + sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0); return true; } DECL_CTOR(SampleInstruction, 1, 1); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index d081235..435869e 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -527,7 +527,7 @@ namespace ir { uint8_t srcIsFloat:1; uint8_t dstIsFloat:1; uint8_t samplerIdx:4; - uint8_t samplerOffset:1; + uint8_t samplerOffset:2; uint8_t imageIdx; static const uint32_t srcNum = 3; static const uint32_t dstNum = 4; diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 605d96d..c43172d 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, in // 2D & 1D Array read OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset); OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); // 3D & 2D Array read OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset); OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); // 1D write @@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id); int __gen_ocl_get_image_channel_data_type(uint surface_id); int __gen_ocl_get_image_channel_order(uint surface_id); int __gen_ocl_get_image_depth(uint surface_id); +/* The printf function. */ +int __gen_ocl_printf_stub(const char * format, ...); +#define printf __gen_ocl_printf_stub // 2D 3D Image Common Macro #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND @@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id); #define GET_IMAGE(cl_image, surface_id) \ uint surface_id = (uint)cl_image +INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image) +{ + GET_IMAGE(image, surface_id); + float array_size = __gen_ocl_get_image_depth(surface_id); + return clamp(rint(index), 0.f, array_size - 1.f); +} + +INLINE_OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image) +{ + GET_IMAGE(image, surface_id); + float array_size = __gen_ocl_get_image_depth(surface_id); + return clamp(rint(index), 0.f, array_size - 1.f); +} + +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image) +{ + GET_IMAGE(image, surface_id); + int array_size = __gen_ocl_get_image_depth(surface_id); + return clamp(index, 0, array_size - 1); +} -#define DECL_READ_IMAGE0(int_clamping_fix, \ +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image) +{ + GET_IMAGE(image, surface_id); + int array_size = __gen_ocl_get_image_depth(surface_id); + return clamp(index, 0, array_size - 1); +} + +#define DECL_READ_IMAGE0(int_clamping_fix, \ image_type, type, suffix, coord_type, n) \ INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \ const sampler_t sampler, \ coord_type coord) \ { \ GET_IMAGE(cl_image, surface_id); \ + GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \ if (int_clamping_fix && \ ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && \ ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \ return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORD(surface_id, sampler, coord), 1); \ + EXPEND_READ_COORD(surface_id, sampler, coord)); \ return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORD(surface_id, sampler, (float)coord), 0);\ + EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \ } #define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, \ @@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id); coord_type coord) \ { \ GET_IMAGE(cl_image, surface_id); \ + GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \ coord_type tmpCoord = coord; \ if (float_coord_rounding_fix | int_clamping_fix) { \ if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \ @@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id); } else \ intCoord = tmpCoord; \ return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORD1(surface_id, sampler, intCoord), 1);\ + EXPEND_READ_COORDI(surface_id, sampler, intCoord));\ } \ } \ } \ return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\ + EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\ } #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) \ @@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id); coord_type coord) \ { \ GET_IMAGE(cl_image, surface_id); \ + GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \ return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORD(surface_id, \ + EXPEND_READ_COORDF(surface_id, \ CLK_NORMALIZED_COORDS_FALSE \ | CLK_ADDRESS_NONE \ - | CLK_FILTER_NEAREST, (float)coord), 0); \ + | CLK_FILTER_NEAREST, (float)coord), 0); \ } #define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \ @@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id); DECL_WRITE_IMAGE(image_type, type, suffix, int) \ DECL_WRITE_IMAGE(image_type, type, suffix, float) -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord) +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1 +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1 #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id); #define EXPEND_WRITE_COORD(id, coord, color) id, coord, color - -#define OUT_OF_BOX(coord, surface, normalized) \ - (coord < 0 || \ - ((normalized == 0) \ - && (coord >= __gen_ocl_get_image_width(surface))) \ - || ((normalized != 0) && (coord > 0x1p0))) +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) #define FIXUP_FLOAT_COORD(tmpCoord) \ { \ @@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f) DECL_IMAGE_INFO_COMMON(image1d_t) #undef EXPEND_READ_COORD -#undef EXPEND_READ_COORD1 +#undef EXPEND_READ_COORDF +#undef EXPEND_READ_COORDI #undef DENORMALIZE_COORD #undef EXPEND_WRITE_COORD -#undef OUT_OF_BOX #undef FIXUP_FLOAT_COORD #undef DECL_IMAGE // End of 1D @@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t) DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \ DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) // 2D -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1 -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \ - (int)(coord.s1 < 0 ? -1 : coord.s1) +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1 +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1 +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \ + (int)(coord.s1 < 0 ? -1 : coord.s1), 1 #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \ dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color -#define OUT_OF_BOX(coord, surface, normalized) \ - (coord.s0 < 0 || coord.s1 < 0 || \ - ((normalized == 0) \ - && (coord.s0 >= __gen_ocl_get_image_width(surface) \ - || coord.s1 >= __gen_ocl_get_image_height(surface))) \ - || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0))) - #define FIXUP_FLOAT_COORD(tmpCoord) \ { \ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ @@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2) DECL_IMAGE(0, image2d_t, float4, f, 2) // 1D Array +#undef GET_IMAGE_ARRAY_SIZE +#undef EXPEND_READ_COORD +#undef EXPEND_READ_COORDF +#undef EXPEND_READ_COORDI +#undef DENORMALIZE_COORD +#undef EXPEND_WRITE_COORD +#undef FIXUP_FLOAT_COORD + +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2 +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2 +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ + coord_type ai = __gen_compute_array_index(coord.s1, image); + +#define FIXUP_FLOAT_COORD(tmpCoord) \ + { \ + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ + tmpCoord.s0 += -0x1p-9; \ + } + DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2) DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2) DECL_IMAGE(0, image1d_array_t, float4, f, 2) @@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) } #undef EXPEND_READ_COORD -#undef EXPEND_READ_COORD1 +#undef EXPEND_READ_COORDI +#undef EXPEND_READ_COORDF #undef DENORMALIZE_COORD #undef EXPEND_WRITE_COORD -#undef OUT_OF_BOX #undef FIXUP_FLOAT_COORD +#undef GET_IMAGE_ARRAY_SIZE // End of 2D and 1D Array // 3D -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2 -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \ - (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2) +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1 +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2 +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \ + (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1 #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \ dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \ dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id); #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color -#define OUT_OF_BOX(coord, surface, normalized) \ - (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 || \ - ((normalized == 0) \ - && (coord.s0 >= __gen_ocl_get_image_width(surface) \ - || coord.s1 >= __gen_ocl_get_image_height(surface) \ - || coord.s2 >= __gen_ocl_get_image_depth(surface))) \ - || ((normalized != 0) \ - &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1))) #define FIXUP_FLOAT_COORD(tmpCoord) \ { \ @@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \ tmpCoord.s2 += -0x1p-9; \ } +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4) DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4) @@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3) DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3) DECL_IMAGE(0, image3d_t, float4, f, 3) +#undef EXPEND_READ_COORD +#undef EXPEND_READ_COORDF +#undef EXPEND_READ_COORDI +#undef DENORMALIZE_COORD +#undef EXPEND_WRITE_COORD +#undef FIXUP_FLOAT_COORD +#undef GET_IMAGE_ARRAY_SIZE + +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1 +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \ + (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1 +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \ + dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color + +#define FIXUP_FLOAT_COORD(tmpCoord) \ + { \ + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \ + tmpCoord.s0 += -0x1p-9; \ + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \ + tmpCoord.s1 += -0x1p-9; \ + } +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ + coord_type ai = __gen_compute_array_index(coord.s2, image); + // 2D Array DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4) DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4) @@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image) } #undef EXPEND_READ_COORD -#undef EXPEND_READ_COORD1 +#undef EXPEND_READ_COORDF +#undef EXPEND_READ_COORDI #undef DENORMALIZE_COORD #undef EXPEND_WRITE_COORD -#undef OUT_OF_BOX #undef FIXUP_FLOAT_COORD +#undef GET_IMAGE_ARRAY_SIZE // End of 3D and 2D Array #undef DECL_IMAGE @@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x) #undef OVERLOADABLE #undef INLINE -/* The printf function. */ -int __gen_ocl_printf_stub(const char * format, ...); -#define printf __gen_ocl_printf_stub - #endif /* __GEN_OCL_STDLIB_H__ */ diff --git a/src/cl_api.c b/src/cl_api.c index b17cc52..9e412f6 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context ctx, err = CL_INVALID_VALUE; goto error; } - if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D && + if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D && + image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && + image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY && + image_type != CL_MEM_OBJECT_IMAGE2D && image_type != CL_MEM_OBJECT_IMAGE3D)) { err = CL_INVALID_VALUE; goto error; diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 1bc97ac..41281f2 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) image->intel_fmt, image->image_type, image->w, image->h, image->depth, image->row_pitch, image->tiling); + if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset, + image->intel_fmt, image->image_type, + image->w, image->h, image->depth, + image->row_pitch, image->tiling); } return CL_SUCCESS; } diff --git a/src/cl_device_id.c b/src/cl_device_id.c index af8e90c..578b548 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id device, DECL_FIELD(IMAGE_SUPPORT, image_support) DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args) DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args) + DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size) DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width) DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height) DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width) diff --git a/src/cl_device_id.h b/src/cl_device_id.h index a5449a7..769bfd2 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -51,6 +51,7 @@ struct _cl_device_id { cl_uint max_read_image_args; cl_uint max_write_image_args; size_t image2d_max_width; + size_t image_max_array_size; size_t image2d_max_height; size_t image3d_max_width; size_t image3d_max_height; diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index b8bda5e..6d03123 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -41,6 +41,7 @@ .image_support = CL_TRUE, .max_read_image_args = 128, .max_write_image_args = 8, +.image_max_array_size = 2048, .image2d_max_width = 8192, .image2d_max_height = 8192, .image3d_max_width = 8192, diff --git a/src/cl_mem.c b/src/cl_mem.c index 491993e..a7a0f59 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -540,7 +540,7 @@ static cl_mem _cl_mem_new_image(cl_context ctx, cl_mem_flags flags, const cl_image_format *fmt, - const cl_mem_object_type image_type, + const cl_mem_object_type orig_image_type, size_t w, size_t h, size_t depth, @@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx, { cl_int err = CL_SUCCESS; cl_mem mem = NULL; + cl_mem_object_type image_type = orig_image_type; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; cl_image_tiling_t tiling = CL_NO_TILE; @@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx, image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY))) DO_IMAGE_ERROR; - if (image_type == CL_MEM_OBJECT_IMAGE1D || - image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + if (image_type == CL_MEM_OBJECT_IMAGE1D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx, else if (data && slice_pitch == 0) slice_pitch = pitch; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; - if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; + if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; @@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx, depth = 1; } else if (image_type == CL_MEM_OBJECT_IMAGE3D || + image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + h = 1; + tiling = CL_NO_TILE; + } else if (cl_driver_get_ver(ctx->drv) != 6) + tiling = cl_get_default_tiling(); + size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx, slice_pitch = min_slice_pitch; if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR; - if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR; + if (image_type == CL_MEM_OBJECT_IMAGE3D && + (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR + else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; - /* Pick up tiling mode (we do only linear on SNB) */ - if (cl_driver_get_ver(ctx->drv) != 6) - tiling = cl_get_default_tiling(); } else assert(0); @@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx, /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; - if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || - image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || - image_type == CL_MEM_OBJECT_IMAGE3D) - aligned_h = ALIGN(h, valign); - else - aligned_h = h; + aligned_h = ALIGN(h, valign); } else if (tiling == CL_TILE_X) { aligned_pitch = ALIGN(w * bpp, tilex_w); aligned_h = ALIGN(h, tilex_h); diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 197d388..ab4cb0d 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -91,7 +91,7 @@ struct intel_gpgpu unsigned long img_bitmap; /* image usage bitmap. */ unsigned int img_index_base; /* base index for image surface.*/ - drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */ + drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */ unsigned long sampler_bitmap; /* sampler usage bitmap. */ @@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 - ss->ss0.surface_type = intel_get_surface_type(type); + if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + ss->ss0.surface_type = I965_SURFACE_2D; + else + ss->ss0.surface_type = intel_get_surface_type(type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; -- 1.8.3.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet