On Thu, May 15, 2014 at 01:17:12PM +0100, Chris Wilson wrote:
> This is pure evil. Userspace, I'm looking at you SNA, repacks batch
> buffers on the fly after generation as they are being passed to the
> kernel for execution. These batches also contain self-referenced
> relocations as a single buffer encompasses the state commands, kernels,
> vertices and sampler. During generation the buffers are placed at known
> offsets within the full batch, and then the relocation deltas (as passed
> to the kernel) are tweaked as the batch is repacked into a smaller buffer.
> This means that userspace is passing negative relocations deltas, which
> subsequently wrap to large values if the batch is at a low address. The
> GPU hangs when it then tries to use the large value as a base for its
> address offsets, rather than wrapping back to the real value (as one
> would hope). As the GPU uses positive offsets from the base, we can
> treat the relocation address as the minimum address read by the GPU.
> For the upper bound, we trust that userspace will not read beyond the
> end of the buffer.
> 
> This fixes a GPU hang when it tries to use an address (relocation +
> offset) greater than the GTT size. The issue would occur quite easily
> with full-ppgtt as each fd gets its own VM space, so low offsets would
> often be handed out. However, with the rearrangement of the low GTT due
> to capturing the BIOS framebuffer, it is already affecting kernels 3.14
> onwards. I think only IVB+ is susceptible to this bug, but the workaround
> should only kick in rarely, so it seems sensible to always apply it.
> 
> v2: Apply the workaround for LUT-based execbuffers as well and only for
> IVB+ as my SNB machine seems to be unaffected.
> 
> Testcase: igt/gem_bad_reloc
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: sta...@vger.kernel.org

Do we need to fix this in the kernel? Userspace supplying relocs that
kinda don't work smells fishy ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            | 15 ++++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 91 
> ++++++++++++++++++++++++++++--
>  2 files changed, 97 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1a271ee8dc22..ae751b73a087 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3626,8 +3626,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
> *obj,
>       struct drm_device *dev = obj->base.dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       u32 size, fence_size, fence_alignment, unfenced_alignment;
> -     size_t gtt_max =
> +     unsigned long gtt_max =
>               flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
> +     unsigned long start = alignment;
>       struct i915_vma *vma;
>       int ret;
>  
> @@ -3649,6 +3650,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
> *obj,
>               DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
>               return ERR_PTR(-EINVAL);
>       }
> +     if (alignment >= gtt_max) {
> +             DRM_DEBUG("Alignment larger than the aperture: alignment=%d >= 
> %s aperture=%lu\n",
> +                       alignment,
> +                       flags & PIN_MAPPABLE ? "mappable" : "total",
> +                       gtt_max);
> +             return ERR_PTR(-EINVAL);
> +     }
>  
>       size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
>  
> @@ -3656,7 +3664,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
> *obj,
>        * before evicting everything in a vain attempt to find space.
>        */
>       if (obj->base.size > gtt_max) {
> -             DRM_DEBUG("Attempting to bind an object larger than the 
> aperture: object=%zd > %s aperture=%zu\n",
> +             DRM_DEBUG("Attempting to bind an object larger than the 
> aperture: object=%zd > %s aperture=%lu\n",
>                         obj->base.size,
>                         flags & PIN_MAPPABLE ? "mappable" : "total",
>                         gtt_max);
> @@ -3692,7 +3700,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
> *obj,
>  search_free:
>               ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
>                                                         size, alignment,
> -                                                       obj->cache_level, 0, 
> gtt_max,
> +                                                       obj->cache_level,
> +                                                       start, gtt_max,
>                                                         DRM_MM_SEARCH_DEFAULT,
>                                                         
> DRM_MM_CREATE_DEFAULT);
>               if (ret) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e94aa365ae40..d37b54862d37 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -166,14 +166,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
>               list_del_init(&obj->obj_exec_link);
>  
>               vma->exec_entry = &exec[i];
> -             if (eb->and < 0) {
> +             vma->exec_handle = args->flags & I915_EXEC_HANDLE_LUT ? i : 
> exec[i].handle;
> +             if (eb->and < 0)
>                       eb->lut[i] = vma;
> -             } else {
> -                     uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? 
> i : exec[i].handle;
> -                     vma->exec_handle = handle;
> +             else
>                       hlist_add_head(&vma->exec_node,
> -                                    &eb->buckets[handle & eb->and]);
> -             }
> +                                    &eb->buckets[vma->exec_handle & 
> eb->and]);
>               ++i;
>       }
>  
> @@ -261,6 +259,19 @@ static inline int use_cpu_reloc(struct 
> drm_i915_gem_object *obj)
>               obj->cache_level != I915_CACHE_NONE);
>  }
>  
> +static bool invalid_offset(struct drm_device *dev, uint64_t offset)
> +{
> +     const int gen = INTEL_INFO(dev)->gen;
> +
> +     if (gen < 7)
> +             return false;
> +
> +     if (gen < 8)
> +             offset = lower_32_bits(offset);
> +
> +     return offset >= to_i915(dev)->gtt.base.total;
> +}
> +
>  static int
>  relocate_entry_cpu(struct drm_i915_gem_object *obj,
>                  struct drm_i915_gem_relocation_entry *reloc,
> @@ -272,6 +283,9 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
>       char *vaddr;
>       int ret;
>  
> +     if (invalid_offset(dev, delta))
> +             return -EFAULT;
> +
>       ret = i915_gem_object_set_to_cpu_domain(obj, true);
>       if (ret)
>               return ret;
> @@ -309,6 +323,9 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>       void __iomem *reloc_page;
>       int ret;
>  
> +     if (invalid_offset(dev, delta))
> +             return -EFAULT;
> +
>       ret = i915_gem_object_set_to_gtt_domain(obj, true);
>       if (ret)
>               return ret;
> @@ -702,6 +719,62 @@ err:
>  }
>  
>  static int
> +i915_gem_execbuffer_relocate_check_slow(struct i915_vma *vma,
> +                                     struct drm_i915_gem_relocation_entry 
> *relocs,
> +                                     int total)
> +{
> +     struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +     struct drm_i915_gem_object *obj = vma->obj;
> +     int32_t min = 0;
> +     int i, ret;
> +
> +     /* This is pure evil. Userspace, I'm looking at you SNA, repacks
> +      * batch buffers on the fly after generation and before
> +      * being passed to the kernel for execution. These batches also
> +      * contain self-referenced relocations as a single buffer encompasses
> +      * the state commands, kernels, vertices and sampler. During
> +      * generation the buffers are placed at known offsets within the full
> +      * batch, and then the relocation deltas (as passed to the kernel)
> +      * are tweaked as the batch is repacked into a smaller buffer.
> +      * This means that userspace is passing negative relocations deltas,
> +      * which subsequently wrap to large values. The GPU hangs when it
> +      * then tries to use the large value as a base for the address offset,
> +      * rather than wrapping back to the real value (as one would hope).
> +      */
> +     for (i = 0; i < total; i++) {
> +             struct drm_i915_gem_relocation_entry *reloc = &relocs[i];
> +             int32_t sdelta;
> +
> +             if (reloc->target_handle != vma->exec_handle)
> +                     continue;
> +
> +             sdelta = reloc->delta;
> +             if (sdelta < min)
> +                     min = sdelta;
> +     }
> +     if (min < 0) {
> +             if (drm_mm_node_allocated(&vma->node) &&
> +                 invalid_offset(obj->base.dev, vma->node.start + 
> (uint32_t)min)) {
> +                     ret = i915_vma_unbind(vma);
> +                     if (ret)
> +                             return ret;
> +             }
> +             if (!drm_mm_node_allocated(&vma->node)) {
> +                     if (entry->alignment == 0)
> +                             entry->alignment =
> +                                     
> i915_gem_get_gtt_alignment(obj->base.dev,
> +                                                                
> obj->base.size,
> +                                                                
> obj->tiling_mode,
> +                                                                entry->flags 
> & __EXEC_OBJECT_NEEDS_MAP);
> +                     while (entry->alignment < -min)
> +                             entry->alignment <<= 1;
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +static int
>  i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
>                                 struct drm_i915_gem_execbuffer2 *args,
>                                 struct drm_file *file,
> @@ -792,6 +865,12 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
>       if (ret)
>               goto err;
>  
> +     list_for_each_entry(vma, &eb->vmas, exec_list) {
> +             ret = i915_gem_execbuffer_relocate_check_slow(vma, reloc, 
> total);
> +             if (ret)
> +                     goto err;
> +     }
> +
>       need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
>       ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
>       if (ret)
> -- 
> 2.0.0.rc2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to