All GEN GPU can bind to any piece of memory (thanks UMA), and so through
a special ioctl we can map a chunk of page-aligned client memory into
the GPU address space. However, not all GEN are equal. Some have
cache-coherency between the CPU and the GPU, whilst the others are
incoherent and rely on snooping on explicit flushes to push/pull dirty
data. Whereas we can use client buffers as a general replacement for kernel
allocated buffers with LLC (cache coherency), using snooped buffers
behaves differently and so must be used with care.

AMD_pinned_memory supposes that the client memory buffer is suitable
for any general usage (e.g. vertex data, texture data) and so only on
LLC can we offer that extension.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c           | 68 +++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_bufmgr.h           | 11 ++++
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 53 ++++++++++++------
 src/mesa/drivers/dri/i965/intel_extensions.c     |  9 ++++
 src/mesa/drivers/dri/i965/intel_screen.c         | 17 ++++++
 src/mesa/drivers/dri/i965/intel_screen.h         |  1 +
 6 files changed, 141 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 14e91468d1..8f81a8bd4a 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -481,6 +481,72 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const 
char *name,
    return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0);
 }
 
+/*
+ * Wrap the chunk of client memory given by ptr+size inside a GPU
+ * buffer, and make it cache coherent (though on non-LLC architectures
+ * this requires snooping on explicit cache flushes). This allows the
+ * caller to write into the memory chunk and for those writes to be
+ * visible on the GPU (exactly as if they create the buffer and then
+ * persistently mapped it to obtain the pointer).
+ */
+struct brw_bo *
+brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+                     const char *name,
+                     void *ptr,
+                     uint64_t size,
+                     uint64_t alignment)
+{
+   struct brw_bo *bo = calloc(1, sizeof(*bo));
+   if (!bo)
+      return NULL;
+
+   bo->bufmgr = bufmgr;
+   bo->name = name;
+   p_atomic_set(&bo->refcount, 1);
+
+   bo->size = size;
+   bo->align = alignment;
+   bo->map_cpu = ptr;
+   bo->userptr = true;
+   bo->reusable = false;
+   bo->cache_coherent = true;
+   bo->idle = true;
+
+   bo->tiling_mode = I915_TILING_NONE;
+   bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+   bo->stride = 0;
+
+   struct drm_i915_gem_userptr arg = {
+      .user_ptr = (uintptr_t)ptr,
+      .user_size = size,
+      .flags = 0,
+   };
+   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) {
+      free(bo);
+      return NULL;
+   }
+
+   bo->gem_handle = arg.handle;
+
+   /* Check the buffer for validity before we try and use it in a batch */
+   if (drmIoctl(bufmgr->fd,
+                DRM_IOCTL_I915_GEM_SET_DOMAIN,
+                &(struct drm_i915_gem_set_domain){
+                   .handle = bo->gem_handle,
+                   .read_domains = I915_GEM_DOMAIN_CPU,
+                }))
+      goto err_free;
+
+   VG_DEFINED(ptr, size); /* Presume we write to it using the GPU */
+   return bo;
+
+err_free:
+   pthread_mutex_lock(&bufmgr->lock);
+   bo_free(bo);
+   pthread_mutex_unlock(&bufmgr->lock);
+   return NULL;
+}
+
 /**
  * Returns a brw_bo wrapping the given buffer object handle.
  *
@@ -574,7 +640,7 @@ bo_free(struct brw_bo *bo)
    struct drm_gem_close close;
    int ret;
 
-   if (bo->map_cpu) {
+   if (bo->map_cpu && !bo->userptr) {
       VG_NOACCESS(bo->map_cpu, bo->size);
       drm_munmap(bo->map_cpu, bo->size);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 45819c17c5..16c035f47b 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -143,6 +143,11 @@ struct brw_bo {
     * Boolean of whether this buffer is cache coherent
     */
    bool cache_coherent;
+
+   /**
+    * Boolean of whether this buffer is a userptr
+    */
+   bool userptr:1;
 };
 
 #define BO_ALLOC_FOR_RENDER (1<<0)
@@ -176,6 +181,12 @@ struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr 
*bufmgr,
                                   uint32_t pitch,
                                   unsigned flags);
 
+struct brw_bo *brw_bo_alloc_userptr(struct brw_bufmgr *bufmgr,
+                                    const char *name,
+                                    void *ptr,
+                                    uint64_t size,
+                                    uint64_t alignment);
+
 /**
  * Allocate a tiled buffer object.
  *
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index ee59116828..2cb123973e 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -74,6 +74,23 @@ mark_buffer_invalid(struct intel_buffer_object *intel_obj)
 
 /** Allocates a new brw_bo to store the data for the buffer object. */
 static void
+mark_new_state(struct brw_context *brw,
+               struct intel_buffer_object *intel_obj)
+{
+   /* the buffer might be bound as a uniform buffer, need to update it
+    */
+   if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER;
+}
+
+/** Allocates a new brw_bo to store the data for the buffer object. */
+static void
 alloc_buffer_object(struct brw_context *brw,
                     struct intel_buffer_object *intel_obj)
 {
@@ -98,17 +115,7 @@ alloc_buffer_object(struct brw_context *brw,
    }
    intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj", size, 64);
 
-   /* the buffer might be bound as a uniform buffer, need to update it
-    */
-   if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER;
-
+   mark_new_state(brw, intel_obj);
    mark_buffer_inactive(intel_obj);
    mark_buffer_invalid(intel_obj);
 }
@@ -206,13 +213,25 @@ brw_buffer_data(struct gl_context *ctx,
       release_buffer(intel_obj);
 
    if (size != 0) {
-      alloc_buffer_object(brw, intel_obj);
-      if (!intel_obj->buffer)
-         return false;
-
-      if (data != NULL) {
-        brw_bo_subdata(intel_obj->buffer, 0, size, data);
+      if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+         intel_obj->buffer =
+            brw_bo_alloc_userptr(brw->bufmgr, "bufferobj(userptr)",
+                                 (void *)data, size, 0);
+         if (!intel_obj->buffer)
+            return false;
+
+         mark_buffer_inactive(intel_obj);
          mark_buffer_valid_data(intel_obj, 0, size);
+         mark_new_state(brw, intel_obj);
+      } else {
+        alloc_buffer_object(brw, intel_obj);
+         if (!intel_obj->buffer)
+            return false;
+
+         if (data != NULL) {
+            brw_bo_subdata(intel_obj->buffer, 0, size, data);
+            mark_buffer_valid_data(intel_obj, 0, size);
+         }
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index b91bbdc8d9..2514712cfa 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -193,6 +193,15 @@ intelInitExtensions(struct gl_context *ctx)
 
       ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp;
 
+      /* Flexibility of using client memory for any buffer (incl. vertex
+       * buffers) rules out the prospect of using snooped buffers, and
+       * using snooped buffers without cogniscience is likely to be
+       * detrimental to performance anyway.
+       */
+      ctx->Extensions.AMD_pinned_memory =
+         brw->screen->kernel_features & KERNEL_ALLOWS_USERPTR &&
+         brw->screen->devinfo.has_llc;
+
       /* Only enable this in core profile because other parts of Mesa behave
        * slightly differently when the extension is enabled.
        */
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index ec07cf0acc..3666b65bb6 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1547,6 +1547,19 @@ intel_detect_swizzling(struct intel_screen *screen)
       return true;
 }
 
+static bool
+intel_detect_userptr(struct intel_screen *screen)
+{
+   errno = 0;
+   drmIoctl(screen->driScrnPriv->fd,
+            DRM_IOCTL_I915_GEM_USERPTR,
+            &(struct drm_i915_gem_userptr){
+               .user_ptr = -4096ULL,
+               .user_size = 8192,
+            });
+   return errno == EFAULT;
+}
+
 static int
 intel_detect_timestamp(struct intel_screen *screen)
 {
@@ -2271,6 +2284,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
    }
 
+   if (intel_detect_userptr(screen)) {
+      screen->kernel_features |= KERNEL_ALLOWS_USERPTR;
+   }
+
    if (!intel_detect_pipelined_so(screen)) {
       /* We can't do anything, so the effective version is 0. */
       screen->cmd_parser_version = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h 
b/src/mesa/drivers/dri/i965/intel_screen.h
index 41e1dbdd4e..b37c2dc40f 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -81,6 +81,7 @@ struct intel_screen
 #define KERNEL_ALLOWS_COMPUTE_DISPATCH              (1<<4)
 #define KERNEL_ALLOWS_EXEC_CAPTURE                  (1<<5)
 #define KERNEL_ALLOWS_EXEC_BATCH_FIRST              (1<<6)
+#define KERNEL_ALLOWS_USERPTR                       (1<<7)
 
    struct brw_bufmgr *bufmgr;
 
-- 
2.13.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to