[Intel-gfx] [PATCH 11/16] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-08-01 Thread Chris Wilson
By applying the same logic as for wait-ioctl, we can query whether a
request has completed without holding struct_mutex. The biggest impact
system-wide is removing the flush_active and the contention that causes.

Testcase: igt/gem_busy
Signed-off-by: Chris Wilson 
Cc: Akash Goel 
---
 drivers/gpu/drm/i915/i915_gem.c | 110 +---
 1 file changed, 80 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 43069b05bdd2..f2f70f5ff9f4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3721,49 +3721,99 @@ i915_gem_object_ggtt_unpin_view(struct 
drm_i915_gem_object *obj,
i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
 }
 
+static __always_inline unsigned
+__busy_read_flag(const struct drm_i915_gem_request *request)
+{
+   return 0x1 << request->engine->exec_id;
+}
+
+static __always_inline unsigned int
+__busy_write_flag(const struct drm_i915_gem_request *request)
+{
+   return request->engine->exec_id;
+}
+
+static __always_inline unsigned
+__busy_flag(const struct i915_gem_active *active,
+   unsigned int (*flag)(const struct drm_i915_gem_request *))
+{
+   struct drm_i915_gem_request *request;
+
+   request = rcu_dereference(active->request);
+   if (!request || i915_gem_request_completed(request))
+   return 0;
+
+   return flag(request);
+}
+
+static inline unsigned
+busy_read_flag(const struct i915_gem_active *active)
+{
+   return __busy_flag(active, __busy_read_flag);
+}
+
+static inline unsigned
+busy_write_flag(const struct i915_gem_active *active)
+{
+   return __busy_flag(active, __busy_write_flag);
+}
+
 int
 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
struct drm_i915_gem_busy *args = data;
struct drm_i915_gem_object *obj;
-   int ret;
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
+   unsigned long active;
 
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
-   /* Count all active objects as busy, even if they are currently not used
-* by the gpu. Users of this interface expect objects to eventually
-* become non-busy without any further actions.
-*/
args->busy = 0;
-   if (i915_gem_object_is_active(obj)) {
-   struct drm_i915_gem_request *req;
-   int i;
+   active = __I915_BO_ACTIVE(obj);
+   if (active) {
+   int idx;
 
-   for (i = 0; i < I915_NUM_ENGINES; i++) {
-   req = i915_gem_active_peek(&obj->last_read[i],
-  
&obj->base.dev->struct_mutex);
-   if (req)
-   args->busy |= 1 << (16 + req->engine->exec_id);
-   }
-   req = i915_gem_active_peek(&obj->last_write,
-  &obj->base.dev->struct_mutex);
-   if (req)
-   args->busy |= req->engine->exec_id;
+   /* Yes, the lookups are intentionally racy.
+*
+* Even though we guard the pointer lookup by RCU, that only
+* guarantees that the pointer and its contents remain
+* dereferencable and does *not* mean that the request we
+* have is the same as the one being tracked by the object.
+*
+* Consider that we lookup the request just as it is being
+* retired and free. We take a local copy of the pointer,
+* but before we add its engine into the busy set, the other
+* thread reallocates it and assigns it to a task on another
+* engine with a fresh and incomplete seqno.
+*
+* So after we lookup the engine's id, we double check that
+* the active request is the same and only then do we add it
+* into the busy set.
+*/
+   rcu_read_lock();
+
+   for_each_active(active, idx)
+   args->busy |= busy_read_flag(&obj->last_read[idx]);
+
+   /* For ABI sanity, we only care that the write engine is in
+* the set of read engines. This is ensured by the ordering
+* of setting last_read/last_write in i915_vma_move_to_active,
+* and then in reverse in retire.
+*
+* We don't care that the set of active read/write engines
+* may change during construction of the result, as it is
+* equally liable to change before userspace can inspect
+* the result.

Re: [Intel-gfx] [PATCH 11/16] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-08-04 Thread Joonas Lahtinen
On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> +static __always_inline unsigned
> +__busy_read_flag(const struct drm_i915_gem_request *request)
> +{
> + return 0x1 << request->engine->exec_id;

Duh, this really is our ABI? No BIT(NUM_ENGINES) or something sane?

Make a comment of such situation.

> + /* Yes, the lookups are intentionally racy.
> +  *
> +  * Even though we guard the pointer lookup by RCU, that only
> +  * guarantees that the pointer and its contents remain
> +  * dereferencable and does *not* mean that the request we
> +  * have is the same as the one being tracked by the object.
> +  *
> +  * Consider that we lookup the request just as it is being
> +  * retired and free. We take a local copy of the pointer,

s/free/freed/

> +  * but before we add its engine into the busy set, the other
> +  * thread reallocates it and assigns it to a task on another
> +  * engine with a fresh and incomplete seqno.
> +  *
> +  * So after we lookup the engine's id, we double check that

This double check is nowhere to be seen, time to update this comment
too?

The code itself is quite OK, but the comments mislead my understanding
of the code again.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/16] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-08-04 Thread Chris Wilson
On Thu, Aug 04, 2016 at 01:25:08PM +0300, Joonas Lahtinen wrote:
> On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> > +static __always_inline unsigned
> > +__busy_read_flag(const struct drm_i915_gem_request *request)
> > +{
> > +   return 0x1 << request->engine->exec_id;
> 
> Duh, this really is our ABI? No BIT(NUM_ENGINES) or something sane?

BIT(NUM_ENGINES) as uabi, you call that sane :)

> Make a comment of such situation.

Like BUILD_BUG_ON(NUM_ENGINES > 16).

> > +   /* Yes, the lookups are intentionally racy.
> > +    *
> > +    * Even though we guard the pointer lookup by RCU, that only
> > +    * guarantees that the pointer and its contents remain
> > +    * dereferencable and does *not* mean that the request we
> > +    * have is the same as the one being tracked by the object.
> > +    *
> > +    * Consider that we lookup the request just as it is being
> > +    * retired and free. We take a local copy of the pointer,
> 
> s/free/freed/
> 
> > +    * but before we add its engine into the busy set, the other
> > +    * thread reallocates it and assigns it to a task on another
> > +    * engine with a fresh and incomplete seqno.
> > +    *
> > +    * So after we lookup the engine's id, we double check that
> 
> This double check is nowhere to be seen, time to update this comment
> too?

Actualy snuck it back in. I took it out thinking that there wasn't
sufficent read-dependency to guarrantee the ordering (but now reversed
my decision there) and had re-read my comment about why the check is
required.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/16] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-08-05 Thread Joonas Lahtinen
On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> By applying the same logic as for wait-ioctl, we can query whether a
> request has completed without holding struct_mutex. The biggest impact
> system-wide is removing the flush_active and the contention that causes.
> 
> Testcase: igt/gem_busy
> Signed-off-by: Chris Wilson 
> Cc: Akash Goel 
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 110 
> +---
>  1 file changed, 80 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 43069b05bdd2..f2f70f5ff9f4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3721,49 +3721,99 @@ i915_gem_object_ggtt_unpin_view(struct 
> drm_i915_gem_object *obj,
>   i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
>  }
>  
> +static __always_inline unsigned
> +__busy_read_flag(const struct drm_i915_gem_request *request)
> +{
> + return 0x1 << request->engine->exec_id;
> +}
> +
> +static __always_inline unsigned int
> +__busy_write_flag(const struct drm_i915_gem_request *request)
> +{
> + return request->engine->exec_id;

Just realized (to my horror) this is not a flag, it's a bare ID, so
better not call the function _flag, but rather _id?

> +}
> +
> +static __always_inline unsigned
> +__busy_flag(const struct i915_gem_active *active,
> + unsigned int (*flag)(const struct drm_i915_gem_request *))
> +{
> + struct drm_i915_gem_request *request;
> +
> + request = rcu_dereference(active->request);
> + if (!request || i915_gem_request_completed(request))
> + return 0;
> +
> + return flag(request);
> +}
> +
> +static inline unsigned
> +busy_read_flag(const struct i915_gem_active *active)
> +{
> + return __busy_flag(active, __busy_read_flag);
> +}
> +
> +static inline unsigned
> +busy_write_flag(const struct i915_gem_active *active)
> +{
> + return __busy_flag(active, __busy_write_flag);
> +}
> +
>  int
>  i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>   struct drm_file *file)
>  {
>   struct drm_i915_gem_busy *args = data;
>   struct drm_i915_gem_object *obj;
> - int ret;
> -
> - ret = i915_mutex_lock_interruptible(dev);
> - if (ret)
> - return ret;
> + unsigned long active;
>  
>   obj = i915_gem_object_lookup(file, args->handle);
> - if (!obj) {
> - ret = -ENOENT;
> - goto unlock;
> - }
> + if (!obj)
> + return -ENOENT;
>  
> - /* Count all active objects as busy, even if they are currently not used
> -  * by the gpu. Users of this interface expect objects to eventually
> -  * become non-busy without any further actions.
> -  */
>   args->busy = 0;
> - if (i915_gem_object_is_active(obj)) {
> - struct drm_i915_gem_request *req;
> - int i;
> + active = __I915_BO_ACTIVE(obj);
> + if (active) {
> + int idx;
>  
> - for (i = 0; i < I915_NUM_ENGINES; i++) {
> - req = i915_gem_active_peek(&obj->last_read[i],
> -    
> &obj->base.dev->struct_mutex);
> - if (req)
> - args->busy |= 1 << (16 + req->engine->exec_id);
> - }
> - req = i915_gem_active_peek(&obj->last_write,
> -    &obj->base.dev->struct_mutex);
> - if (req)
> - args->busy |= req->engine->exec_id;
> + /* Yes, the lookups are intentionally racy.
> +  *
> +  * Even though we guard the pointer lookup by RCU, that only
> +  * guarantees that the pointer and its contents remain
> +  * dereferencable and does *not* mean that the request we
> +  * have is the same as the one being tracked by the object.
> +  *
> +  * Consider that we lookup the request just as it is being
> +  * retired and free. We take a local copy of the pointer,

still s/free/freed/

> +  * but before we add its engine into the busy set, the other
> +  * thread reallocates it and assigns it to a task on another
> +  * engine with a fresh and incomplete seqno.
> +  *
> +  * So after we lookup the engine's id, we double check that
> +  * the active request is the same and only then do we add it
> +  * into the busy set.
> +  */
> + rcu_read_lock();
> +
> + for_each_active(active, idx)
> + args->busy |= busy_read_flag(&obj->last_read[idx]);

So you mean this is double check against __I915_BO_ACTIVE, right?

We're getting there, though. With above fixed;

Reviewed-by: Joonas Lahtinen 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_

Re: [Intel-gfx] [PATCH 11/16] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-08-05 Thread Chris Wilson
On Fri, Aug 05, 2016 at 10:05:38AM +0300, Joonas Lahtinen wrote:
> On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> > By applying the same logic as for wait-ioctl, we can query whether a
> > request has completed without holding struct_mutex. The biggest impact
> > system-wide is removing the flush_active and the contention that causes.
> > 
> > Testcase: igt/gem_busy
> > Signed-off-by: Chris Wilson 
> > Cc: Akash Goel 
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 110 
> > +---
> >  1 file changed, 80 insertions(+), 30 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 43069b05bdd2..f2f70f5ff9f4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3721,49 +3721,99 @@ i915_gem_object_ggtt_unpin_view(struct 
> > drm_i915_gem_object *obj,
> >     i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
> >  }
> >  
> > +static __always_inline unsigned
> > +__busy_read_flag(const struct drm_i915_gem_request *request)
> > +{
> > +   return 0x1 << request->engine->exec_id;
> > +}
> > +
> > +static __always_inline unsigned int
> > +__busy_write_flag(const struct drm_i915_gem_request *request)
> > +{
> > +   return request->engine->exec_id;
> 
> Just realized (to my horror) this is not a flag, it's a bare ID, so
> better not call the function _flag, but rather _id?

Bah.

__busy_write_id
__busy_read_flag
__busy_set_if_active

busy_set_active_write_id() { __busy_set_if_active(ptr, __busy_write_id}; )
busy_set_active_read_flag() { __busy_set_if_active(ptr, __busy_read_flag); }

> > +    * but before we add its engine into the busy set, the other
> > +    * thread reallocates it and assigns it to a task on another
> > +    * engine with a fresh and incomplete seqno.
> > +    *
> > +    * So after we lookup the engine's id, we double check that
> > +    * the active request is the same and only then do we add it
> > +    * into the busy set.
> > +    */
> > +   rcu_read_lock();
> > +
> > +   for_each_active(active, idx)
> > +   args->busy |= busy_read_flag(&obj->last_read[idx]);
> 
> So you mean this is double check against __I915_BO_ACTIVE, right?

Yes. The ABI guarantees forward progress but __I915_BO_ACTIVE itself
does not, so we confirm each of the active requests with the hardware.

Will add.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/16] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-08-05 Thread Joonas Lahtinen
On pe, 2016-08-05 at 08:34 +0100, Chris Wilson wrote:
> On Fri, Aug 05, 2016 at 10:05:38AM +0300, Joonas Lahtinen wrote:
> > 
> > On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> > > 
> > > By applying the same logic as for wait-ioctl, we can query whether a
> > > request has completed without holding struct_mutex. The biggest impact
> > > system-wide is removing the flush_active and the contention that causes.
> > > 
> > > Testcase: igt/gem_busy
> > > Signed-off-by: Chris Wilson 
> > > Cc: Akash Goel 
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem.c | 110 
> > > +---
> > >  1 file changed, 80 insertions(+), 30 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > > b/drivers/gpu/drm/i915/i915_gem.c
> > > index 43069b05bdd2..f2f70f5ff9f4 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -3721,49 +3721,99 @@ i915_gem_object_ggtt_unpin_view(struct 
> > > drm_i915_gem_object *obj,
> > >   i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
> > >  }
> > >  
> > > +static __always_inline unsigned
> > > +__busy_read_flag(const struct drm_i915_gem_request *request)
> > > +{
> > > + return 0x1 << request->engine->exec_id;
> > > +}
> > > +
> > > +static __always_inline unsigned int
> > > +__busy_write_flag(const struct drm_i915_gem_request *request)
> > > +{
> > > + return request->engine->exec_id;
> > Just realized (to my horror) this is not a flag, it's a bare ID, so
> > better not call the function _flag, but rather _id?
> Bah.
> 
> __busy_write_id
> __busy_read_flag
> __busy_set_if_active
> 
> busy_set_active_write_id() { __busy_set_if_active(ptr, __busy_write_id}; )
> busy_set_active_read_flag() { __busy_set_if_active(ptr, __busy_read_flag); }
> 

This would be OK,

Reviewed-by: Joonas Lahtinen 

> > 
> > > 
> > > +  * but before we add its engine into the busy set, the other
> > > +  * thread reallocates it and assigns it to a task on another
> > > +  * engine with a fresh and incomplete seqno.
> > > +  *
> > > +  * So after we lookup the engine's id, we double check that
> > > +  * the active request is the same and only then do we add it
> > > +  * into the busy set.
> > > +  */
> > > + rcu_read_lock();
> > > +
> > > + for_each_active(active, idx)
> > > + args->busy |= busy_read_flag(&obj->last_read[idx]);
> > So you mean this is double check against __I915_BO_ACTIVE, right?
> Yes. The ABI guarantees forward progress but __I915_BO_ACTIVE itself
> does not, so we confirm each of the active requests with the hardware.
> 
> Will add.
> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx