On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote:
> Move the decision on whether we need to have a mappable object during
> execbuffer to the fore and then reuse that decision by propagating the
> flag through to reservation. As a corollary, before doing the actual
> relocation through the GTT, we can make sure that we do have a GTT
> mapping through which to operate.
> 
> v2: Revamp and resend to ease future patches.
> v3: Refresh patch rationale
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widaw...@intel.com>
> Cc: Daniel Vetter <daniel.vet...@ffwll.ch>

Ok, the secure batch hunk in here looks rather unrelated and imo also a
bit incomplete. I've dropped it. And I've added a bit of text to the
commit message to explain why this patch touches map_and_fenceable logic.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            |  8 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 
> ++++++++++++++----------------
>  2 files changed, 42 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99250d27668d..6366230c4e32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>       vma->unbind_vma(vma);
>  
>       list_del_init(&vma->mm_list);
> -     /* Avoid an unnecessary call to unbind on rebind. */
>       if (i915_is_ggtt(vma->vm))
> -             obj->map_and_fenceable = true;
> +             obj->map_and_fenceable = false;
>  
>       drm_mm_remove_node(&vma->node);
>       i915_gem_vma_destroy(vma);
> @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object 
> *obj)
>                       return 0;
>               }
>       } else if (enable) {
> +             if (WARN_ON(!obj->map_and_fenceable))
> +                     return -EINVAL;
> +
>               reg = i915_find_fence_reg(dev);
>               if (IS_ERR(reg))
>                       return PTR_ERR(reg);
> @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object 
> *obj,
>  
>       obj->fence_reg = I915_FENCE_REG_NONE;
>       obj->madv = I915_MADV_WILLNEED;
> -     /* Avoid an unnecessary call to unbind on the first bind. */
> -     obj->map_and_fenceable = true;
>  
>       i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc4e5b2..8b734d5d16b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
>  #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
>  
>  #define BATCH_OFFSET_BIAS (256*1024)
> @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
>  }
>  
>  static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> -     struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -     return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> -             i915_is_ggtt(vma->vm);
> -}
> -
> -static int
>  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>                               struct intel_engine_cs *ring,
>                               bool *need_reloc)
> @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>       struct drm_i915_gem_object *obj = vma->obj;
>       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>       bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -     bool need_fence;
>       uint64_t flags;
>       int ret;
>  
>       flags = 0;
> -
> -     need_fence =
> -             has_fenced_gpu_access &&
> -             entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -             obj->tiling_mode != I915_TILING_NONE;
> -     if (need_fence || need_reloc_mappable(vma))
> +     if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
>               flags |= PIN_MAPPABLE;
> -
>       if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
>               flags |= PIN_GLOBAL;
>       if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  }
>  
>  static bool
> -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
> +need_reloc_mappable(struct i915_vma *vma)
>  {
>       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -     struct drm_i915_gem_object *obj = vma->obj;
> -     bool need_fence, need_mappable;
>  
> -     need_fence =
> -             has_fenced_gpu_access &&
> -             entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -             obj->tiling_mode != I915_TILING_NONE;
> -     need_mappable = need_fence || need_reloc_mappable(vma);
> +     if (entry->relocation_count == 0)
> +             return false;
>  
> -     WARN_ON((need_mappable || need_fence) &&
> +     if (!i915_is_ggtt(vma->vm))
> +             return false;
> +
> +     /* See also use_cpu_reloc() */
> +     if (HAS_LLC(vma->obj->base.dev))
> +             return false;
> +
> +     if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> +             return false;
> +
> +     return true;
> +}
> +
> +static bool
> +eb_vma_misplaced(struct i915_vma *vma)
> +{
> +     struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +     struct drm_i915_gem_object *obj = vma->obj;
> +
> +     WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
>              !i915_is_ggtt(vma->vm));
>  
>       if (entry->alignment &&
>           vma->node.start & (entry->alignment - 1))
>               return true;
>  
> -     if (need_mappable && !obj->map_and_fenceable)
> +     if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>               return true;
>  
>       if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>                       obj->tiling_mode != I915_TILING_NONE;
>               need_mappable = need_fence || need_reloc_mappable(vma);
>  
> -             if (need_mappable)
> +             if (need_mappable) {
> +                     entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>                       list_move(&vma->exec_list, &ordered_vmas);
> -             else
> +             } else
>                       list_move_tail(&vma->exec_list, &ordered_vmas);
>  
>               obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & 
> ~I915_GEM_DOMAIN_COMMAND;
> @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>                       if (!drm_mm_node_allocated(&vma->node))
>                               continue;
>  
> -                     if (eb_vma_misplaced(vma, has_fenced_gpu_access))
> +                     if (eb_vma_misplaced(vma))
>                               ret = i915_vma_unbind(vma);
>                       else
>                               ret = i915_gem_execbuffer_reserve_vma(vma, 
> ring, need_relocs);
> @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
> *data,
>       /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>        * batch" bit. Hence we need to pin secure batches into the global gtt.
>        * hsw should have this fixed, but bdw mucks it up again. */
> -     if (flags & I915_DISPATCH_SECURE &&
> -         !batch_obj->has_global_gtt_mapping) {
> -             /* When we have multiple VMs, we'll need to make sure that we
> -              * allocate space first */
> -             struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> -             BUG_ON(!vma);
> -             vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> -     }
> +     if (flags & I915_DISPATCH_SECURE) {
> +             ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> +             if (ret)
> +                     goto err;
>  
> -     if (flags & I915_DISPATCH_SECURE)
>               exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> -     else
> +     } else
>               exec_start += i915_gem_obj_offset(batch_obj, vm);
>  
>       ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
> -                     args, &eb->vmas, batch_obj, exec_start, flags);
> -     if (ret)
> -             goto err;
> +                                        args, &eb->vmas, batch_obj, 
> exec_start, flags);
>  
> +     if (flags & I915_DISPATCH_SECURE)
> +             i915_gem_object_ggtt_unpin(batch_obj);
>  err:
>       /* the request owns the ref now */
>       i915_gem_context_unreference(ctx);
> -- 
> 2.1.0.rc1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to