Ony non-llc architectures where we are primarily reading back the
results of the GPU queries, then we can improve performance by using a
cacheable mapping of the results. Unfortunately, enabling snooping makes
the writes from the GPU slower, which may adversely affect pipelined
query operations (where the results are used directly by the GPU and not
CPU).

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenn...@whitecape.org>
Cc: Matt Turner <matts...@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c    | 24 ++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_bufmgr.h    |  2 ++
 src/mesa/drivers/dri/i965/gen6_queryobj.c |  4 +++-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 5c7647f8bc..d71cef25e3 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -683,6 +683,30 @@ brw_bo_unreference(struct brw_bo *bo)
    }
 }
 
+static bool
+__brw_bo_set_caching(struct brw_bo *bo, int caching)
+{
+   struct drm_i915_gem_caching arg = {
+      .handle = bo->gem_handle,
+      .caching = caching
+   };
+   return drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
+}
+
+void
+brw_bo_set_cache_coherent(struct brw_bo *bo)
+{
+   assert(!bo->external);
+   if (bo->cache_coherent)
+      return;
+
+   if (!__brw_bo_set_caching(bo, I915_CACHING_CACHED))
+      return;
+
+   bo->reusable = false;
+   bo->cache_coherent = true;
+}
+
 static void
 bo_wait_with_stall_warning(struct brw_context *brw,
                            struct brw_bo *bo,
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 9848fe9268..45819c17c5 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -221,6 +221,8 @@ void brw_bo_unreference(struct brw_bo *bo);
 #define MAP_INTERNAL_MASK       (0xff << 24)
 #define MAP_RAW                 (0x01 << 24)
 
+void brw_bo_set_cache_coherent(struct brw_bo *bo);
+
 /**
  * Maps the buffer into userspace.
  *
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index c96f00d8ba..a3b552c6c1 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -225,7 +225,7 @@ gen6_queryobj_get_results(struct gl_context *ctx,
 
    brw_bo_wait_rendering(query->bo);
    uint64_t *results = query->results;
-   if (!query->bo->cache_coherent)
+   if (unlikely(!query->bo->cache_coherent))
       gen_invalidate_range(results, query->bo->size);
 
    switch (query->Base.Target) {
@@ -320,6 +320,8 @@ gen6_alloc_query(struct brw_context *brw, struct 
brw_query_object *query)
       brw_bo_unreference(query->bo);
 
    query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
+   brw_bo_set_cache_coherent(query->bo);
+
    query->results = brw_bo_map(brw, query->bo,
                                MAP_COHERENT | MAP_PERSISTENT |
                                MAP_READ | MAP_ASYNC);
-- 
2.13.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to