All GEN GPU can bind to any piece of memory (thanks UMA), and so through a special ioctl we can map a chunk of page-aligned client memory into the GPU address space. However, not all GEN are equal. Some have cache-coherency between the CPU and the GPU, whilst the others are incoherent and rely on snooping on explicit flushes to push/pull dirty data. Whereas we can use client buffers as a general replacement for kernel allocated buffers with LLC (cache coherency), using snooped buffers behaves differently and so must be used with care.
AMD_pinned_memory supposes that the client memory buffer is suitable for any general usage (e.g. vertex data, texture data) and so only on LLC can we offer that extension. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 68 +++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_bufmgr.h | 11 ++++ src/mesa/drivers/dri/i965/intel_buffer_objects.c | 53 ++++++++++++------ src/mesa/drivers/dri/i965/intel_extensions.c | 9 ++++ src/mesa/drivers/dri/i965/intel_screen.c | 17 ++++++ src/mesa/drivers/dri/i965/intel_screen.h | 1 + 6 files changed, 141 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 14e91468d1..8f81a8bd4a 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -481,6 +481,72 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name, return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0); } +/* + * Wrap the chunk of client memory given by ptr+size inside a GPU + * buffer, and make it cache coherent (though on non-LLC architectures + * this requires snooping on explicit cache flushes). This allows the + * caller to write into the memory chunk and for those writes to be + * visible on the GPU (exactly as if they create the buffer and then + * persistently mapped it to obtain the pointer). + */ +struct brw_bo * +brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr, + const char *name, + void *ptr, + uint64_t size, + uint64_t alignment) +{ + struct brw_bo *bo = calloc(1, sizeof(*bo)); + if (!bo) + return NULL; + + bo->bufmgr = bufmgr; + bo->name = name; + p_atomic_set(&bo->refcount, 1); + + bo->size = size; + bo->align = alignment; + bo->map_cpu = ptr; + bo->userptr = true; + bo->reusable = false; + bo->cache_coherent = true; + bo->idle = true; + + bo->tiling_mode = I915_TILING_NONE; + bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo->stride = 0; + + struct drm_i915_gem_userptr arg = { + .user_ptr = (uintptr_t)ptr, + .user_size = size, + .flags = 0, + }; + if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) { + free(bo); + return NULL; + } + + bo->gem_handle = arg.handle; + + /* Check the buffer for validity before we try and use it in a batch */ + if (drmIoctl(bufmgr->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &(struct drm_i915_gem_set_domain){ + .handle = bo->gem_handle, + .read_domains = I915_GEM_DOMAIN_CPU, + })) + goto err_free; + + VG_DEFINED(ptr, size); /* Presume we write to it using the GPU */ + return bo; + +err_free: + pthread_mutex_lock(&bufmgr->lock); + bo_free(bo); + pthread_mutex_unlock(&bufmgr->lock); + return NULL; +} + /** * Returns a brw_bo wrapping the given buffer object handle. * @@ -574,7 +640,7 @@ bo_free(struct brw_bo *bo) struct drm_gem_close close; int ret; - if (bo->map_cpu) { + if (bo->map_cpu && !bo->userptr) { VG_NOACCESS(bo->map_cpu, bo->size); drm_munmap(bo->map_cpu, bo->size); } diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h index 45819c17c5..16c035f47b 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h @@ -143,6 +143,11 @@ struct brw_bo { * Boolean of whether this buffer is cache coherent */ bool cache_coherent; + + /** + * Boolean of whether this buffer is a userptr + */ + bool userptr:1; }; #define BO_ALLOC_FOR_RENDER (1<<0) @@ -176,6 +181,12 @@ struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, uint32_t pitch, unsigned flags); +struct brw_bo *brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr, + const char *name, + void *ptr, + uint64_t size, + uint64_t alignment); + /** * Allocate a tiled buffer object. * diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index ee59116828..2cb123973e 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -74,6 +74,23 @@ mark_buffer_invalid(struct intel_buffer_object *intel_obj) /** Allocates a new brw_bo to store the data for the buffer object. */ static void +mark_new_state(struct brw_context *brw, + struct intel_buffer_object *intel_obj) +{ + /* the buffer might be bound as a uniform buffer, need to update it + */ + if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER; +} + +/** Allocates a new brw_bo to store the data for the buffer object. */ +static void alloc_buffer_object(struct brw_context *brw, struct intel_buffer_object *intel_obj) { @@ -98,17 +115,7 @@ alloc_buffer_object(struct brw_context *brw, } intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj", size, 64); - /* the buffer might be bound as a uniform buffer, need to update it - */ - if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER; - + mark_new_state(brw, intel_obj); mark_buffer_inactive(intel_obj); mark_buffer_invalid(intel_obj); } @@ -206,13 +213,25 @@ brw_buffer_data(struct gl_context *ctx, release_buffer(intel_obj); if (size != 0) { - alloc_buffer_object(brw, intel_obj); - if (!intel_obj->buffer) - return false; - - if (data != NULL) { - brw_bo_subdata(intel_obj->buffer, 0, size, data); + if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) { + intel_obj->buffer = + brw_bo_alloc_userptr(brw->bufmgr, "bufferobj(userptr)", + (void *)data, size, 0); + if (!intel_obj->buffer) + return false; + + mark_buffer_inactive(intel_obj); mark_buffer_valid_data(intel_obj, 0, size); + mark_new_state(brw, intel_obj); + } else { + alloc_buffer_object(brw, intel_obj); + if (!intel_obj->buffer) + return false; + + if (data != NULL) { + brw_bo_subdata(intel_obj->buffer, 0, size, data); + mark_buffer_valid_data(intel_obj, 0, size); + } } } diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index b91bbdc8d9..2514712cfa 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -193,6 +193,15 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp; + /* Flexibility of using client memory for any buffer (incl. vertex + * buffers) rules out the prospect of using snooped buffers, and + * using snooped buffers without cogniscience is likely to be + * detrimental to performance anyway. + */ + ctx->Extensions.AMD_pinned_memory = + brw->screen->kernel_features & KERNEL_ALLOWS_USERPTR && + brw->screen->devinfo.has_llc; + /* Only enable this in core profile because other parts of Mesa behave * slightly differently when the extension is enabled. */ diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index ec07cf0acc..3666b65bb6 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1547,6 +1547,19 @@ intel_detect_swizzling(struct intel_screen *screen) return true; } +static bool +intel_detect_userptr(struct intel_screen *screen) +{ + errno = 0; + drmIoctl(screen->driScrnPriv->fd, + DRM_IOCTL_I915_GEM_USERPTR, + &(struct drm_i915_gem_userptr){ + .user_ptr = -4096ULL, + .user_size = 8192, + }); + return errno == EFAULT; +} + static int intel_detect_timestamp(struct intel_screen *screen) { @@ -2271,6 +2284,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST; } + if (intel_detect_userptr(screen)) { + screen->kernel_features |= KERNEL_ALLOWS_USERPTR; + } + if (!intel_detect_pipelined_so(screen)) { /* We can't do anything, so the effective version is 0. */ screen->cmd_parser_version = 0; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 41e1dbdd4e..b37c2dc40f 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -81,6 +81,7 @@ struct intel_screen #define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4) #define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5) #define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6) +#define KERNEL_ALLOWS_USERPTR (1<<7) struct brw_bufmgr *bufmgr; -- 2.13.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev