Move the decision on whether we need to have a mappable object during
execbuffer to the fore and then reuse that decision by propagating the
flag through to reservation. As a corollary, before doing the actual
relocation through the GTT, we can make sure that we do have a GTT
mapping through which to operate.

v2: Revamp and resend to ease future patches.
v3: Refresh patch rationale

References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widaw...@intel.com>
Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |  8 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
 2 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99250d27668d..6366230c4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
        vma->unbind_vma(vma);
 
        list_del_init(&vma->mm_list);
-       /* Avoid an unnecessary call to unbind on rebind. */
        if (i915_is_ggtt(vma->vm))
-               obj->map_and_fenceable = true;
+               obj->map_and_fenceable = false;
 
        drm_mm_remove_node(&vma->node);
        i915_gem_vma_destroy(vma);
@@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
                        return 0;
                }
        } else if (enable) {
+               if (WARN_ON(!obj->map_and_fenceable))
+                       return -EINVAL;
+
                reg = i915_find_fence_reg(dev);
                if (IS_ERR(reg))
                        return PTR_ERR(reg);
@@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
        obj->fence_reg = I915_FENCE_REG_NONE;
        obj->madv = I915_MADV_WILLNEED;
-       /* Avoid an unnecessary call to unbind on the first bind. */
-       obj->map_and_fenceable = true;
 
        i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..8b734d5d16b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-               i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
                                struct intel_engine_cs *ring,
                                bool *need_reloc)
@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
        struct drm_i915_gem_object *obj = vma->obj;
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-       bool need_fence;
        uint64_t flags;
        int ret;
 
        flags = 0;
-
-       need_fence =
-               has_fenced_gpu_access &&
-               entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-               obj->tiling_mode != I915_TILING_NONE;
-       if (need_fence || need_reloc_mappable(vma))
+       if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
                flags |= PIN_MAPPABLE;
-
        if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
                flags |= PIN_GLOBAL;
        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 }
 
 static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
 {
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       struct drm_i915_gem_object *obj = vma->obj;
-       bool need_fence, need_mappable;
 
-       need_fence =
-               has_fenced_gpu_access &&
-               entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-               obj->tiling_mode != I915_TILING_NONE;
-       need_mappable = need_fence || need_reloc_mappable(vma);
+       if (entry->relocation_count == 0)
+               return false;
 
-       WARN_ON((need_mappable || need_fence) &&
+       if (!i915_is_ggtt(vma->vm))
+               return false;
+
+       /* See also use_cpu_reloc() */
+       if (HAS_LLC(vma->obj->base.dev))
+               return false;
+
+       if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+               return false;
+
+       return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+       struct drm_i915_gem_object *obj = vma->obj;
+
+       WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
               !i915_is_ggtt(vma->vm));
 
        if (entry->alignment &&
            vma->node.start & (entry->alignment - 1))
                return true;
 
-       if (need_mappable && !obj->map_and_fenceable)
+       if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
                return true;
 
        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
                        obj->tiling_mode != I915_TILING_NONE;
                need_mappable = need_fence || need_reloc_mappable(vma);
 
-               if (need_mappable)
+               if (need_mappable) {
+                       entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
                        list_move(&vma->exec_list, &ordered_vmas);
-               else
+               } else
                        list_move_tail(&vma->exec_list, &ordered_vmas);
 
                obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & 
~I915_GEM_DOMAIN_COMMAND;
@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
                        if (!drm_mm_node_allocated(&vma->node))
                                continue;
 
-                       if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+                       if (eb_vma_misplaced(vma))
                                ret = i915_vma_unbind(vma);
                        else
                                ret = i915_gem_execbuffer_reserve_vma(vma, 
ring, need_relocs);
@@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
        /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
         * batch" bit. Hence we need to pin secure batches into the global gtt.
         * hsw should have this fixed, but bdw mucks it up again. */
-       if (flags & I915_DISPATCH_SECURE &&
-           !batch_obj->has_global_gtt_mapping) {
-               /* When we have multiple VMs, we'll need to make sure that we
-                * allocate space first */
-               struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-               BUG_ON(!vma);
-               vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-       }
+       if (flags & I915_DISPATCH_SECURE) {
+               ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+               if (ret)
+                       goto err;
 
-       if (flags & I915_DISPATCH_SECURE)
                exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-       else
+       } else
                exec_start += i915_gem_obj_offset(batch_obj, vm);
 
        ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
-                       args, &eb->vmas, batch_obj, exec_start, flags);
-       if (ret)
-               goto err;
+                                          args, &eb->vmas, batch_obj, 
exec_start, flags);
 
+       if (flags & I915_DISPATCH_SECURE)
+               i915_gem_object_ggtt_unpin(batch_obj);
 err:
        /* the request owns the ref now */
        i915_gem_context_unreference(ctx);
-- 
2.1.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to