userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

Tvrtko Ursulin Mon, 05 Nov 2018 08:45:46 -0800


On 02/11/2018 16:12, Chris Wilson wrote:

Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.


However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it.

Judging by the code below non-blockable paths can handle failure butblockable can not? Right, now that I read the invalidate_range_start apidocs that seems to be the case. So that sounds like blockable brings usmarginal benefits, if any, on the design level. Which is why I supposethis patch looks quite big. Lets see..


Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable moe for mmu 
notifiers")
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
  drivers/gpu/drm/i915/i915_drv.h         |   4 +-
  drivers/gpu/drm/i915/i915_gem.c         |  18 +-
  drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
  drivers/gpu/drm/i915/i915_gem_userptr.c | 217 +++++++++++-------------
  4 files changed, 120 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a88a7eb871b..1056b12c3bc8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3073,8 +3073,8 @@ enum i915_mm_subclass { /* lockdep subclass for 
obj->mm.lock */
        I915_MM_SHRINKER
  };

-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-                                enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+                               enum i915_mm_subclass subclass);
  void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);

enum i915_map_type {

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 93d09282710d..9a8af9454a53 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2429,8 +2429,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object 
*obj)
        struct sg_table *pages;

pages = fetch_and_zero(&obj->mm.pages);

-       if (!pages)
-               return NULL;
+       if (IS_ERR_OR_NULL(pages))
+               return pages; >
        spin_lock(&i915->mm.obj_lock);
        list_del(&obj->mm.link);
@@ -2454,17 +2454,16 @@ __i915_gem_object_unset_pages(struct 
drm_i915_gem_object *obj)
        return pages;
  }

-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

-                                enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+                               enum i915_mm_subclass subclass)
  {
        struct sg_table *pages;
+       int ret = -EBUSY;

if (i915_gem_object_has_pinned_pages(obj))

-               return;
+               return -EBUSY;

GEM_BUG_ON(obj->bind_count);

-       if (!i915_gem_object_has_pages(obj))
-               return;


Unrelated to this patch?

/* May be called by shrinker from within get_pages() (on another bo) */

        mutex_lock_nested(&obj->mm.lock, subclass);
@@ -2477,11 +2476,16 @@ void __i915_gem_object_put_pages(struct 
drm_i915_gem_object *obj,
         * lists early.
         */
        pages = __i915_gem_object_unset_pages(obj);
+       if (!pages && !i915_gem_object_needs_async_cancel(obj))
+               pages = ERR_PTR(-EINVAL);

(Hmm yeah, this path did not used to handle the existing possible NULLpages here.)

Please put a blurb in the commit message on the high to medium leveldesign of the change.

        if (!IS_ERR(pages))
                obj->ops->put_pages(obj, pages);

+ ret = 0;

  unlock:
        mutex_unlock(&obj->mm.lock);
+
+       return ret;
  }

bool i915_sg_trim(struct sg_table *orig_st)

diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/i915_gem_object.h
index a6dd7c46de0d..49ce797173b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -56,6 +56,7 @@ struct drm_i915_gem_object_ops {
  #define I915_GEM_OBJECT_HAS_STRUCT_PAGE       BIT(0)
  #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
  #define I915_GEM_OBJECT_IS_PROXY      BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL   BIT(3)

/* Interface between the GEM object and its backing storage.

         * get_pages() is called once prior to the use of the associated set
@@ -386,6 +387,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object 
*obj)
        return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
  }

+static inline bool

+i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
+{
+       return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+}
+
  static inline bool
  i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
  {
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2c9b284036d1..ab5ae426e27b 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -50,79 +50,84 @@ struct i915_mmu_notifier {
        struct hlist_node node;
        struct mmu_notifier mn;
        struct rb_root_cached objects;
-       struct workqueue_struct *wq;
+       struct i915_mm_struct *mm;
  };

struct i915_mmu_object {

        struct i915_mmu_notifier *mn;
        struct drm_i915_gem_object *obj;
        struct interval_tree_node it;
-       struct list_head link;
-       struct work_struct work;
-       bool attached;
  };

-static void cancel_userptr(struct work_struct *work)

-{
-       struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
-       struct drm_i915_gem_object *obj = mo->obj;
-       struct work_struct *active;
-
-       /* Cancel any active worker and force us to re-evaluate gup */
-       mutex_lock(&obj->mm.lock);
-       active = fetch_and_zero(&obj->userptr.work);
-       mutex_unlock(&obj->mm.lock);
-       if (active)
-               goto out;
-
-       i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
-
-       mutex_lock(&obj->base.dev->struct_mutex);
-
-       /* We are inside a kthread context and can't be interrupted */
-       if (i915_gem_object_unbind(obj) == 0)
-               __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-       WARN_ONCE(i915_gem_object_has_pages(obj),
-                 "Failed to release pages: bind_count=%d, pages_pin_count=%d, 
pin_global=%d\n",
-                 obj->bind_count,
-                 atomic_read(&obj->mm.pages_pin_count),
-                 obj->pin_global);
-
-       mutex_unlock(&obj->base.dev->struct_mutex);
-
-out:
-       i915_gem_object_put(obj);
-}
-
  static void add_object(struct i915_mmu_object *mo)
  {
-       if (mo->attached)
+       if (!RB_EMPTY_NODE(&mo->it.rb))
                return;

interval_tree_insert(&mo->it, &mo->mn->objects);

-       mo->attached = true;
  }

static void del_object(struct i915_mmu_object *mo)

  {
-       if (!mo->attached)
+       if (RB_EMPTY_NODE(&mo->it.rb))
                return;

interval_tree_remove(&mo->it, &mo->mn->objects);

-       mo->attached = false;
+       RB_CLEAR_NODE(&mo->it.rb);
+}
+
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+       struct i915_mmu_object *mo = obj->userptr.mmu_object;
+
+       /*
+        * During mm_invalidate_range we need to cancel any userptr that
+        * overlaps the range being invalidated. Doing so requires the
+        * struct_mutex, and that risks recursion. In order to cause
+        * recursion, the user must alias the userptr address space with
+        * a GTT mmapping (possible with a MAP_FIXED) - then when we have
+        * to invalidate that mmaping, mm_invalidate_range is called with
+        * the userptr address *and* the struct_mutex held.  To prevent that
+        * we set a flag under the i915_mmu_notifier spinlock to indicate
+        * whether this object is valid.
+        */
+       if (!mo)
+               return;
+
+       spin_lock(&mo->mn->lock);
+       if (value)
+               add_object(mo);
+       else
+               del_object(mo);
+       spin_unlock(&mo->mn->lock);
+}
+
+static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
+{
+       switch (mutex_trylock_recursive(m)) {
+       default:
+       case MUTEX_TRYLOCK_FAILED:
+               mutex_lock(m);
+       case MUTEX_TRYLOCK_SUCCESS:
+               return m;
+
+       case MUTEX_TRYLOCK_RECURSIVE:
+               return ERR_PTR(-EEXIST);

This suggest we will break the api requirement to return alwaysinvalidate in the blockable case.

+       }
  }

static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,

-                                                      struct mm_struct *mm,
-                                                      unsigned long start,
-                                                      unsigned long end,
-                                                      bool blockable)
+                                                     struct mm_struct *mm,
+                                                     unsigned long start,
+                                                     unsigned long end,
+                                                     bool blockable)
  {
        struct i915_mmu_notifier *mn =
                container_of(_mn, struct i915_mmu_notifier, mn);
-       struct i915_mmu_object *mo;
        struct interval_tree_node *it;
-       LIST_HEAD(cancelled);
+       struct mutex *unlock = NULL;
+       int ret = 0;

if (RB_EMPTY_ROOT(&mn->objects.rb_root))

                return 0;
@@ -133,11 +138,15 @@ static int 
i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
        spin_lock(&mn->lock);
        it = interval_tree_iter_first(&mn->objects, start, end);
        while (it) {
+               struct drm_i915_gem_object *obj;
+
                if (!blockable) {
-                       spin_unlock(&mn->lock);
-                       return -EAGAIN;
+                       ret = -EAGAIN;
+                       break;
                }
-               /* The mmu_object is released late when destroying the
+
+               /*
+                * The mmu_object is released late when destroying the
                 * GEM object so it is entirely possible to gain a
                 * reference on an object in the process of being freed
                 * since our serialisation is via the spinlock and not
@@ -146,21 +155,33 @@ static int 
i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
                 * use-after-free we only acquire a reference on the
                 * object if it is not in the process of being destroyed.
                 */
-               mo = container_of(it, struct i915_mmu_object, it);
-               if (kref_get_unless_zero(&mo->obj->base.refcount))
-                       queue_work(mn->wq, &mo->work);
-
-               list_add(&mo->link, &cancelled);
-               it = interval_tree_iter_next(it, start, end);
+               obj = container_of(it, struct i915_mmu_object, it)->obj;
+               if (!kref_get_unless_zero(&obj->base.refcount)) {
+                       it = interval_tree_iter_next(it, start, end);
+                       continue;
+               }
+               spin_unlock(&mn->lock);
+
+               if (!unlock)
+                       unlock = 
__i915_mutex_lock_recursive(&mn->mm->i915->drm.struct_mutex);

Hmm .. but we proceed regardless of the trylock result and don't evenbother looking at it. I don't get it. I stop here since it doesn't makesense to me at this moment.


Regards,

Tvrtko

+               ret = i915_gem_object_unbind(obj);
+               if (ret == 0)
+                       ret = __i915_gem_object_put_pages(obj, 
I915_MM_SHRINKER);
+               i915_gem_object_put(obj);
+               if (ret)
+                       goto unlock;
+
+               spin_lock(&mn->lock);
+               it = interval_tree_iter_first(&mn->objects, start, end);
        }
-       list_for_each_entry(mo, &cancelled, link)
-               del_object(mo);
        spin_unlock(&mn->lock);

- if (!list_empty(&cancelled))

-               flush_workqueue(mn->wq);
+unlock:
+       if (!IS_ERR_OR_NULL(unlock))
+               mutex_unlock(unlock);
+
+       return ret;

- return 0;

static const struct mmu_notifier_ops i915_gem_userptr_notifier = {

@@ -168,7 +189,7 @@ static const struct mmu_notifier_ops 
i915_gem_userptr_notifier = {
  };

static struct i915_mmu_notifier *

-i915_mmu_notifier_create(struct mm_struct *mm)
+i915_mmu_notifier_create(struct i915_mm_struct *mm)
  {
        struct i915_mmu_notifier *mn;

@@ -179,13 +200,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)

        spin_lock_init(&mn->lock);
        mn->mn.ops = &i915_gem_userptr_notifier;
        mn->objects = RB_ROOT_CACHED;
-       mn->wq = alloc_workqueue("i915-userptr-release",
-                                WQ_UNBOUND | WQ_MEM_RECLAIM,
-                                0);
-       if (mn->wq == NULL) {
-               kfree(mn);
-               return ERR_PTR(-ENOMEM);
-       }
+       mn->mm = mm;

return mn;

  }
@@ -195,16 +210,14 @@ i915_gem_userptr_release__mmu_notifier(struct 
drm_i915_gem_object *obj)
  {
        struct i915_mmu_object *mo;

- mo = obj->userptr.mmu_object;

-       if (mo == NULL)
+       mo = fetch_and_zero(&obj->userptr.mmu_object);
+       if (!mo)
                return;

spin_lock(&mo->mn->lock);

        del_object(mo);
        spin_unlock(&mo->mn->lock);
        kfree(mo);
-
-       obj->userptr.mmu_object = NULL;
  }

static struct i915_mmu_notifier *

@@ -217,7 +230,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
        if (mn)
                return mn;

- mn = i915_mmu_notifier_create(mm->mm);

+       mn = i915_mmu_notifier_create(mm);
        if (IS_ERR(mn))
                err = PTR_ERR(mn);

@@ -240,10 +253,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)

        mutex_unlock(&mm->i915->mm_lock);
        up_write(&mm->mm->mmap_sem);

- if (mn && !IS_ERR(mn)) {

-               destroy_workqueue(mn->wq);
+       if (mn && !IS_ERR(mn))
                kfree(mn);
-       }

return err ? ERR_PTR(err) : mm->mn;

  }
@@ -266,14 +277,14 @@ i915_gem_userptr_init__mmu_notifier(struct 
drm_i915_gem_object *obj,
                return PTR_ERR(mn);

mo = kzalloc(sizeof(*mo), GFP_KERNEL);

-       if (mo == NULL)
+       if (!mo)
                return -ENOMEM;

mo->mn = mn;

        mo->obj = obj;
        mo->it.start = obj->userptr.ptr;
        mo->it.last = obj->userptr.ptr + obj->base.size - 1;
-       INIT_WORK(&mo->work, cancel_userptr);
+       RB_CLEAR_NODE(&mo->it.rb);

obj->userptr.mmu_object = mo;

        return 0;
@@ -287,12 +298,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
                return;

mmu_notifier_unregister(&mn->mn, mm);

-       destroy_workqueue(mn->wq);
        kfree(mn);
  }

#else+static void

+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+}
+
  static void
  i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
  {
@@ -461,42 +476,6 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object 
*obj,
        return st;
  }

-static int

-__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
-                             bool value)
-{
-       int ret = 0;
-
-       /* During mm_invalidate_range we need to cancel any userptr that
-        * overlaps the range being invalidated. Doing so requires the
-        * struct_mutex, and that risks recursion. In order to cause
-        * recursion, the user must alias the userptr address space with
-        * a GTT mmapping (possible with a MAP_FIXED) - then when we have
-        * to invalidate that mmaping, mm_invalidate_range is called with
-        * the userptr address *and* the struct_mutex held.  To prevent that
-        * we set a flag under the i915_mmu_notifier spinlock to indicate
-        * whether this object is valid.
-        */
-#if defined(CONFIG_MMU_NOTIFIER)
-       if (obj->userptr.mmu_object == NULL)
-               return 0;
-
-       spin_lock(&obj->userptr.mmu_object->mn->lock);
-       /* In order to serialise get_pages with an outstanding
-        * cancel_userptr, we must drop the struct_mutex and try again.
-        */
-       if (!value)
-               del_object(obj->userptr.mmu_object);
-       else if (!work_pending(&obj->userptr.mmu_object->work))
-               add_object(obj->userptr.mmu_object);
-       else
-               ret = -EAGAIN;
-       spin_unlock(&obj->userptr.mmu_object->mn->lock);
-#endif
-
-       return ret;
-}
-
  static void
  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
  {
@@ -682,8 +661,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
        struct sgt_iter sgt_iter;
        struct page *page;

- BUG_ON(obj->userptr.work != NULL);

+       /* Cancel any inflight work and force them to restart their gup */
+       obj->userptr.work = NULL;
        __i915_gem_userptr_set_active(obj, false);
+       if (!pages)
+               return;

if (obj->mm.madv != I915_MADV_WILLNEED)

                obj->mm.dirty = false;
@@ -721,7 +703,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object 
*obj)

static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {

        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-                I915_GEM_OBJECT_IS_SHRINKABLE,
+                I915_GEM_OBJECT_IS_SHRINKABLE |
+                I915_GEM_OBJECT_ASYNC_CANCEL,
        .get_pages = i915_gem_userptr_get_pages,
        .put_pages = i915_gem_userptr_put_pages,
        .dmabuf_export = i915_gem_userptr_dmabuf_export,

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 04/25] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

Reply via email to