the current limitation is both host_ptr and buffer size should be page aligned, loose the limitation of host_ptr to be cache line size (64byte) alignment, and no limitation for the size.
Signed-off-by: Guo Yejun <yejun....@intel.com> --- src/cl_command_queue.c | 8 ++++++-- src/cl_mem.c | 17 +++++++++++++++-- src/cl_mem.h | 1 + 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 89afa07..f843548 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -133,6 +133,10 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) int id = k->images[i].arg_idx; struct _cl_mem_image *image; assert(interp_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE); + + //currently, user ptr is not supported for cl image, so offset should be always zero + assert(k->args[id].mem->offset == 0); + image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset, @@ -166,9 +170,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem; - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); } else { - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); } } diff --git a/src/cl_mem.c b/src/cl_mem.c index 3225fd2..36ef2ce 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -266,6 +266,7 @@ cl_mem_allocate(enum cl_mem_type type, mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; mem->is_userptr = 0; + mem->offset = 0; if (sz != 0) { /* Pinning will require stricter alignment rules */ @@ -279,15 +280,21 @@ cl_mem_allocate(enum cl_mem_type type, #ifdef HAS_USERPTR if (ctx->device->host_unified_memory) { int page_size = getpagesize(); + int cacheline_size = 0; + cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL); + /* currently only cl buf is supported, will add cl image support later */ if (type == CL_MEM_BUFFER_TYPE) { if (flags & CL_MEM_USE_HOST_PTR) { assert(host_ptr != NULL); /* userptr not support tiling */ if (!is_tiled) { - if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { + if (ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) { + void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1))); + mem->offset = host_ptr - aligned_host_ptr; mem->is_userptr = 1; - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0); + size_t aligned_sz = ALIGN((mem->offset + sz), page_size); + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); } } } @@ -514,6 +521,8 @@ cl_mem_new_sub_buffer(cl_mem buffer, mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; + mem->offset = buffer->offset; + mem->is_userptr = buffer->is_userptr; sub_buf->parent = (struct _cl_mem_buffer*)buffer; cl_mem_add_ref(buffer); @@ -1853,6 +1862,10 @@ cl_mem_unmap_gtt(cl_mem mem) LOCAL void* cl_mem_map_auto(cl_mem mem, int write) { + //if mem is not created from userptr, the offset should be always zero. + if (!mem->is_userptr) + assert(mem->offset == 0); + if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE) return cl_mem_map_gtt(mem); else { diff --git a/src/cl_mem.h b/src/cl_mem.h index fd50220..e027f15 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -94,6 +94,7 @@ typedef struct _cl_mem { uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ + size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/ } _cl_mem; struct _cl_mem_image { -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet