With a fence, we only need to insert a memory barrier around the actual
fence alteration for CPU accesses through the GTT. Performing the
barrier in flush-fence was inserting unnecessary and expensive barriers
for never fenced objects.

Note removing the barriers from flush-fence, which was effectively a
barrier before every direct access through the GTT, revealed that we
where missing a barrier before the first access through the GTT. Lack of
that barrier was sufficient to cause GPU hangs.

v2: Add a couple more comments to explain the new barriers

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vet...@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c |   40 +++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05ff790..3c4577b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2771,9 +2771,22 @@ static void i830_write_fence_reg(struct drm_device *dev, 
int reg,
        POSTING_READ(FENCE_REG_830_0 + reg * 4);
 }
 
+inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
+{
+       return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+}
+
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
                                 struct drm_i915_gem_object *obj)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /* Ensure that all CPU reads are completed before installing a fence
+        * and all writes before removing the fence.
+        */
+       if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
+               mb();
+
        switch (INTEL_INFO(dev)->gen) {
        case 7:
        case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
@@ -2783,6 +2796,12 @@ static void i915_gem_write_fence(struct drm_device *dev, 
int reg,
        case 2: i830_write_fence_reg(dev, reg, obj); break;
        default: break;
        }
+
+       /* And similarly be paranoid that no direct access to this region
+        * is reordered to before the fence is installed.
+        */
+       if (i915_gem_object_needs_mb(obj))
+               mb();
 }
 
 static inline int fence_number(struct drm_i915_private *dev_priv,
@@ -2812,7 +2831,7 @@ static void i915_gem_object_update_fence(struct 
drm_i915_gem_object *obj,
 }
 
 static int
-i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
+i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
        if (obj->last_fenced_seqno) {
                int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
@@ -2822,12 +2841,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object 
*obj)
                obj->last_fenced_seqno = 0;
        }
 
-       /* Ensure that all CPU reads are completed before installing a fence
-        * and all writes before removing the fence.
-        */
-       if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
-               mb();
-
        obj->fenced_gpu_access = false;
        return 0;
 }
@@ -2838,7 +2851,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
        struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
        int ret;
 
-       ret = i915_gem_object_flush_fence(obj);
+       ret = i915_gem_object_wait_fence(obj);
        if (ret)
                return ret;
 
@@ -2912,7 +2925,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
         * will need to serialise the write to the associated fence register?
         */
        if (obj->fence_dirty) {
-               ret = i915_gem_object_flush_fence(obj);
+               ret = i915_gem_object_wait_fence(obj);
                if (ret)
                        return ret;
        }
@@ -2933,7 +2946,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
                if (reg->obj) {
                        struct drm_i915_gem_object *old = reg->obj;
 
-                       ret = i915_gem_object_flush_fence(old);
+                       ret = i915_gem_object_wait_fence(old);
                        if (ret)
                                return ret;
 
@@ -3244,6 +3257,13 @@ i915_gem_object_set_to_gtt_domain(struct 
drm_i915_gem_object *obj, bool write)
 
        i915_gem_object_flush_cpu_write_domain(obj);
 
+       /* Serialise direct access to this object with the barriers for
+        * coherent writes from the GPU, by effectively invalidating the
+        * GTT domain upon first access.
+        */
+       if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+               mb();
+
        old_write_domain = obj->base.write_domain;
        old_read_domains = obj->base.read_domains;
 
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to