Hi Chris,

A few questions/comments throughout. I may be off the mark on some. Please
bear with me as I try to get more familiar with the gem code.

Thanks,
Brad

[ snip ]

On Fri, Jan 24, 2014 at 01:00:19AM -0800, Chris Wilson wrote:
> +static void
> +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> +{
> +     struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> +     mmu_notifier_unregister(&mmu->mn, mmu->mm);
> +     kfree(mmu);
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> +{
> +     hash_del(&mmu->node);
> +     INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> +     schedule_work(&mmu->work);

The commit message mentions a potential lock inversion as the reason for using 
a wq.
A comment with the details might be helpful.

> +}
> +
> +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> +{
> +     if (++mmu->serial == 0)
> +             mmu->serial = 1;
> +}
> +
> +static void
> +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> +                   struct i915_mmu_object *mn)
> +{
> +     bool destroy;
> +
> +     spin_lock(&mmu->lock);
> +     interval_tree_remove(&mn->it, &mmu->objects);
> +     destroy = --mmu->count == 0;
> +     __i915_mmu_notifier_update_serial(mmu);
> +     spin_unlock(&mmu->lock);
> +
> +     if (destroy) /* protected against _add() by struct_mutex */
> +             __i915_mmu_notifier_destroy(mmu);

I see that we should hold struct_mutex when this function is called,
but I don't see that we try to get the mutex anywhere on the _add() path.
Have I missed something?

> +}
> +
> +static int
> +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> +                   struct i915_mmu_object *mn)
> +{
> +     int ret = -EINVAL;
> +
> +     spin_lock(&mmu->lock);
> +     /* Disallow overlapping userptr objects */
> +     if (!interval_tree_iter_first(&mmu->objects,
> +                                   mn->it.start, mn->it.last)) {
> +             interval_tree_insert(&mn->it, &mmu->objects);
> +             mmu->count++;
> +             __i915_mmu_notifier_update_serial(mmu);
> +             ret = 0;
> +     }
> +     spin_unlock(&mmu->lock);
> +
> +     return ret;
> +}
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +     struct i915_mmu_object *mn;
> +
> +     mn = obj->userptr.mn;
> +     if (mn == NULL)
> +             return;
> +
> +     i915_mmu_notifier_del(mn->mmu, mn);
> +     obj->userptr.mn = NULL;
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +                                 unsigned flags)
> +{
> +     struct i915_mmu_notifier *mmu;
> +     struct i915_mmu_object *mn;
> +     int ret;
> +
> +     if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +             return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +     mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> +     if (IS_ERR(mmu))
> +             return PTR_ERR(mmu);
> +
> +     mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> +     if (mn == NULL) {
> +             ret = -ENOMEM;
> +             goto destroy_mmu;
> +     }
> +
> +     mn->mmu = mmu;
> +     mn->it.start = obj->userptr.ptr;
> +     mn->it.last = mn->it.start + obj->base.size - 1;
> +     mn->obj = obj;
> +
> +     ret = i915_mmu_notifier_add(mmu, mn);
> +     if (ret)
> +             goto free_mn;
> +
> +     obj->userptr.mn = mn;
> +     return 0;
> +
> +free_mn:
> +     kfree(mn);
> +destroy_mmu:
> +     if (mmu->count == 0)
> +             __i915_mmu_notifier_destroy(mmu);

Other accesses to mmu->count are protected by mmu->lock. Again, I may have 
missed
something but don't immediately see why that's not required.

> +     return ret;
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +                                 unsigned flags)
> +{
> +     if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +             return -ENODEV;
> +
> +     if (!capable(CAP_SYS_ADMIN))
> +             return -EPERM;
> +
> +     return 0;
> +}
> +#endif
> +
> +struct get_pages_work {
> +     struct work_struct work;
> +     struct drm_i915_gem_object *obj;
> +     struct task_struct *task;
> +};
> +
> +static void
> +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> +{
> +     struct get_pages_work *work = container_of(_work, typeof(*work), work);
> +     struct drm_i915_gem_object *obj = work->obj;
> +     struct drm_device *dev = obj->base.dev;
> +     const int num_pages = obj->base.size >> PAGE_SHIFT;
> +     struct page **pvec;
> +     int pinned, ret;
> +
> +     ret = -ENOMEM;
> +     pinned = 0;
> +
> +     pvec = kmalloc(num_pages*sizeof(struct page *),
> +                    GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +     if (pvec == NULL)
> +             pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +     if (pvec != NULL) {
> +             struct mm_struct *mm = obj->userptr.mm;
> +
> +             use_mm(mm);
> +             down_read(&mm->mmap_sem);
> +             while (pinned < num_pages) {
> +                     ret = get_user_pages(work->task, mm,
> +                                          obj->userptr.ptr + pinned * 
> PAGE_SIZE,
> +                                          num_pages - pinned,
> +                                          !obj->userptr.read_only, 0,
> +                                          pvec + pinned, NULL);
> +                     if (ret < 0)
> +                             break;
> +
> +                     pinned += ret;
> +             }
> +             up_read(&mm->mmap_sem);
> +             unuse_mm(mm);
> +     }
> +
> +     mutex_lock(&dev->struct_mutex);
> +     if (obj->userptr.work != &work->work) {
> +             ret = 0;
> +     } else if (pinned == num_pages) {
> +             struct sg_table *st;
> +
> +             st = kmalloc(sizeof(*st), GFP_KERNEL);
> +             if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +                     kfree(st);
> +                     ret = -ENOMEM;
> +             } else {
> +                     struct scatterlist *sg;
> +                     int n;
> +
> +                     for_each_sg(st->sgl, sg, num_pages, n)
> +                             sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +                     obj->pages = st;
> +                     list_add_tail(&obj->global_list, 
> &to_i915(dev)->mm.unbound_list);
> +                     pinned = 0;
> +                     ret = 0;
> +             }
> +     }
> +
> +     obj->userptr.work = ERR_PTR(ret);
> +     drm_gem_object_unreference(&obj->base);
> +     mutex_unlock(&dev->struct_mutex);
> +
> +     release_pages(pvec, pinned, 0);
> +     drm_free_large(pvec);
> +
> +     put_task_struct(work->task);
> +     kfree(work);
> +}
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +     const int num_pages = obj->base.size >> PAGE_SHIFT;
> +     struct page **pvec;
> +     int pinned, ret;
> +
> +     /* If userspace should engineer that these pages are replaced in
> +      * the vma between us binding this page into the GTT and completion
> +      * of rendering... Their loss. If they change the mapping of their
> +      * pages they need to create a new bo to point to the new vma.
> +      *
> +      * However, that still leaves open the possibility of the vma
> +      * being copied upon fork. Which falls under the same userspace
> +      * synchronisation issue as a regular bo, except that this time
> +      * the process may not be expecting that a particular piece of
> +      * memory is tied to the GPU.
> +      *
> +      * Fortunately, we can hook into the mmu_notifier in order to
> +      * discard the page references prior to anything nasty happening
> +      * to the vma (discard or cloning) which should prevent the more
> +      * egregious cases from causing harm.
> +      */
> +
> +     pvec = NULL;
> +     pinned = 0;
> +     if (obj->userptr.mm == current->mm) {
> +             pvec = kmalloc(num_pages*sizeof(struct page *),
> +                            GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +             if (pvec == NULL) {
> +                     pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +                     if (pvec == NULL)
> +                             return -ENOMEM;
> +             }
> +
> +             pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> +                                            !obj->userptr.read_only, pvec);
> +     }
> +     if (pinned < num_pages) {
> +             if (pinned < 0) {
> +                     ret = pinned;
> +                     pinned = 0;
> +             } else {
> +                     /* Spawn a worker so that we can acquire the
> +                      * user pages without holding our mutex.
> +                      */
> +                     ret = -EAGAIN;
> +                     if (obj->userptr.work == NULL) {
> +                             struct get_pages_work *work;
> +
> +                             work = kmalloc(sizeof(*work), GFP_KERNEL);
> +                             if (work != NULL) {
> +                                     obj->userptr.work = &work->work;
> +
> +                                     work->obj = obj;
> +                                     drm_gem_object_reference(&obj->base);
> +
> +                                     work->task = current;
> +                                     get_task_struct(work->task);
> +
> +                                     INIT_WORK(&work->work, 
> __i915_gem_userptr_get_pages_worker);
> +                                     schedule_work(&work->work);

Any reason to use the system wq instead of the driver wq here?
It doesn't look like it's the usual "takes modeset locks" justification.

> +                             } else
> +                                     ret = -ENOMEM;
> +                     } else {
> +                             if (IS_ERR(obj->userptr.work)) {

} else if (...) { ?

> +                                     ret = PTR_ERR(obj->userptr.work);
> +                                     obj->userptr.work = NULL;
> +                             }
> +                     }
> +             }
> +     } else {
> +             struct sg_table *st;
> +
> +             st = kmalloc(sizeof(*st), GFP_KERNEL);
> +             if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +                     kfree(st);
> +                     ret = -ENOMEM;
> +             } else {
> +                     struct scatterlist *sg;
> +                     int n;
> +
> +                     for_each_sg(st->sgl, sg, num_pages, n)
> +                             sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +                     obj->pages = st;
> +                     obj->userptr.work = NULL;
> +
> +                     pinned = 0;
> +                     ret = 0;
> +             }

This block is almost identical to code in the worker. Would it be worth 
extracting
the common parts into a helper?

> +     }
> +
> +     release_pages(pvec, pinned, 0);
> +     drm_free_large(pvec);
> +     return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +     struct scatterlist *sg;
> +     int i;
> +
> +     if (obj->madv != I915_MADV_WILLNEED)
> +             obj->dirty = 0;

This is subtly different than similar code in the standard put_pages() in that
it sets dirty=0 for both DONTNEED and PURGED instead of just DONTNEED (w/ 
BUG_ON(PURGED)).
I don't think we will ever actually truncate userptr objects, so is there any 
reason for
this to be different?

> +
> +     for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +             struct page *page = sg_page(sg);
> +
> +             if (obj->dirty)
> +                     set_page_dirty(page);
> +
> +             mark_page_accessed(page);
> +             page_cache_release(page);
> +     }
> +     obj->dirty = 0;
> +
> +     sg_free_table(obj->pages);
> +     kfree(obj->pages);
> +
> +     BUG_ON(obj->userptr.work != NULL);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +     i915_gem_userptr_release__mmu_notifier(obj);
> +
> +     if (obj->userptr.mm) {
> +             mmput(obj->userptr.mm);
> +             obj->userptr.mm = NULL;
> +     }
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +     .get_pages = i915_gem_userptr_get_pages,
> +     .put_pages = i915_gem_userptr_put_pages,
> +     .release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some normal memory from the process
> + * context - user memory.
> + *
> + * We impose several restrictions upon the memory being mapped
> + * into the GPU.
> + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> + * 2. We only allow a bo as large as we could in theory map into the GTT,
> + *    that is we limit the size to the total size of the GTT.
> + * 3. The bo is marked as being snoopable. The backing pages are left
> + *    accessible directly by the CPU, but reads by the GPU may incur the cost
> + *    of a snoop (unless you have an LLC architecture).

No overlapping ranges

- Brad

> + *
> + * Synchronisation between multiple users and the GPU is left to userspace
> + * through the normal set-domain-ioctl. The kernel will enforce that the
> + * GPU relinquishes the VMA before it is returned back to the system
> + * i.e. upon free(), munmap() or process termination. However, the userspace
> + * malloc() library may not immediately relinquish the VMA after free() and
> + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> + * Caveat emptor.
> + *
> + * Also note, that the object created here is not currently a "first class"
> + * object, in that several ioctls are banned. These are the CPU access
> + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> + * direct access via your pointer rather than use those ioctls.
> + *
> + * If you think this is a good interface to use to pass GPU memory between
> + * drivers, please use dma-buf instead. In fact, wherever possible use
> + * dma-buf instead.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file 
> *file)
> +{
> +     struct drm_i915_private *dev_priv = dev->dev_private;
> +     struct drm_i915_gem_userptr *args = data;
> +     struct drm_i915_gem_object *obj;
> +     int ret;
> +     u32 handle;
> +
> +     if (args->flags & ~(I915_USERPTR_READ_ONLY |
> +                         I915_USERPTR_UNSYNCHRONIZED))
> +             return -EINVAL;
> +
> +     if (offset_in_page(args->user_ptr | args->user_size))
> +             return -EINVAL;
> +
> +     if (args->user_size > dev_priv->gtt.base.total)
> +             return -E2BIG;
> +
> +     if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : 
> VERIFY_WRITE,
> +                    (char __user *)(unsigned long)args->user_ptr, 
> args->user_size))
> +             return -EFAULT;
> +
> +     /* Allocate the new object */
> +     obj = i915_gem_object_alloc(dev);
> +     if (obj == NULL)
> +             return -ENOMEM;
> +
> +     drm_gem_private_object_init(dev, &obj->base, args->user_size);
> +     i915_gem_object_init(obj, &i915_gem_userptr_ops);
> +     obj->cache_level = I915_CACHE_LLC;
> +     obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +     obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +
> +     obj->userptr.ptr = args->user_ptr;
> +     obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> +
> +     /* And keep a pointer to the current->mm for resolving the user pages
> +      * at binding. This means that we need to hook into the mmu_notifier
> +      * in order to detect if the mmu is destroyed.
> +      */
> +     ret = -ENOMEM;
> +     if ((obj->userptr.mm = get_task_mm(current)))
> +             ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +     if (ret == 0)
> +             ret = drm_gem_handle_create(file, &obj->base, &handle);
> +
> +     /* drop reference from allocate - handle holds it now */
> +     drm_gem_object_unreference_unlocked(&obj->base);
> +     if (ret)
> +             return ret;
> +
> +     args->handle = handle;
> +     return 0;
> +}
> +
> +int
> +i915_gem_init_userptr(struct drm_device *dev)
> +{
> +#if defined(CONFIG_MMU_NOTIFIER)
> +     struct drm_i915_private *dev_priv = to_i915(dev);
> +     hash_init(dev_priv->mmu_notifiers);
> +#endif
> +     return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ae8cf61b8ce3..cce9f559e3d7 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -201,6 +201,7 @@ static void print_error_buffers(struct 
> drm_i915_error_state_buf *m,
>               err_puts(m, tiling_flag(err->tiling));
>               err_puts(m, dirty_flag(err->dirty));
>               err_puts(m, purgeable_flag(err->purgeable));
> +             err_puts(m, err->userptr ? " userptr" : "");
>               err_puts(m, err->ring != -1 ? " " : "");
>               err_puts(m, ring_str(err->ring));
>               err_puts(m, i915_cache_level_str(err->cache_level));
> @@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>       err->tiling = obj->tiling_mode;
>       err->dirty = obj->dirty;
>       err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +     err->userptr = obj->userptr.mm != 0;
>       err->ring = obj->ring ? obj->ring->id : -1;
>       err->cache_level = obj->cache_level;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 37c8073a8246..6c145a0be250 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_REG_READ            0x31
>  #define DRM_I915_GET_RESET_STATS     0x32
>  #define DRM_I915_GEM_CREATE2         0x33
> +#define DRM_I915_GEM_USERPTR         0x34
>  
>  #define DRM_IOCTL_I915_INIT          DRM_IOW( DRM_COMMAND_BASE + 
> DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH         DRM_IO ( DRM_COMMAND_BASE + 
> DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY   DRM_IOW (DRM_COMMAND_BASE + 
> DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ                      DRM_IOWR 
> (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS               DRM_IOWR 
> (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR                   DRM_IOWR 
> (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
>       __u32 pad;
>  };
>  
> +struct drm_i915_gem_userptr {
> +     __u64 user_ptr;
> +     __u64 user_size;
> +     __u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +     /**
> +      * Returned handle for the object.
> +      *
> +      * Object handles are nonzero.
> +      */
> +     __u32 handle;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.8.5.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to