Re: [Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU

2016-08-03 Thread Joonas Lahtinen
On ke, 2016-08-03 at 14:36 +0100, Chris Wilson wrote:
> On Wed, Aug 03, 2016 at 04:23:16PM +0300, Joonas Lahtinen wrote:
> > 
> > On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> > > 
> > >  /**
> > > @@ -1647,6 +1629,15 @@ int i915_gem_fault(struct vm_area_struct *vma, 
> > > struct vm_fault *vmf)
> > >   int ret = 0;
> > >   bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
> > >  
> > > + /* Try to flush the object off the GPU first without holding the lock.
> > > +  * Upon acquiring the lock, we will perform our sanity checks and then
> > > +  * repeat the flush holding the lock in the normal manner to catch cases
> > > +  * where we are gazumped.
> > > +  */
> > > + ret = __unsafe_wait_rendering(obj, NULL, !write);
> > > + if (ret)
> > > + goto err;
> > > +
> > Why do you lift this call super early, tracing will be affected at
> > least.
> I was moving it out of the rpm_get, since we don't need to be inside that
> runtime barrier. (That rpm get is very interesting btw, but that's for
> later!)
> 
> The trace can be moved as well.

With the trace moved and the label simplified,

Reviewed-by: Joonas Lahtinen 

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU

2016-08-03 Thread Chris Wilson
On Wed, Aug 03, 2016 at 04:23:16PM +0300, Joonas Lahtinen wrote:
> On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> >  /**
> > @@ -1647,6 +1629,15 @@ int i915_gem_fault(struct vm_area_struct *vma, 
> > struct vm_fault *vmf)
> >     int ret = 0;
> >     bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
> >  
> > +   /* Try to flush the object off the GPU first without holding the lock.
> > +    * Upon acquiring the lock, we will perform our sanity checks and then
> > +    * repeat the flush holding the lock in the normal manner to catch cases
> > +    * where we are gazumped.
> > +    */
> > +   ret = __unsafe_wait_rendering(obj, NULL, !write);
> > +   if (ret)
> > +   goto err;
> > +
> 
> Why do you lift this call super early, tracing will be affected at
> least.

I was moving it out of the rpm_get, since we don't need to be inside that
runtime barrier. (That rpm get is very interesting btw, but that's for
later!)

The trace can be moved as well.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU

2016-08-03 Thread Joonas Lahtinen
On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote:
> - if (!obj) {
> - ret = -ENOENT;
> - goto unlock;
> - }
> + if (!obj)
> + return -ENOENT;
>  
>   /* Try to flush the object off the GPU without holding the lock.
>    * We will repeat the flush holding the lock in the normal manner
>    * to catch cases where we are gazumped.
>    */
> - ret = i915_gem_object_wait_rendering__nonblocking(obj,
> -   to_rps_client(file),
> -   !write_domain);
> + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
>   if (ret)
> - goto unref;
> + goto out_unlocked;
> +
> + ret = i915_mutex_lock_interruptible(dev);
> + if (ret)
> + goto out_unlocked;
>  
>   if (read_domains & I915_GEM_DOMAIN_GTT)
>   ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
> @@ -1501,11 +1481,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, 
> void *data,
>   if (write_domain != 0)
>   intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
>  
> -unref:
>   i915_gem_object_put(obj);
> -unlock:
>   mutex_unlock(>struct_mutex);
>   return ret;
> +
> +out_unlocked:

This is the sole label, you could call 'err' too.

> + i915_gem_object_put_unlocked(obj);
> + return ret;
>  }
>  
>  /**
> @@ -1647,6 +1629,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
> vm_fault *vmf)
>   int ret = 0;
>   bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
>  
> + /* Try to flush the object off the GPU first without holding the lock.
> +  * Upon acquiring the lock, we will perform our sanity checks and then
> +  * repeat the flush holding the lock in the normal manner to catch cases
> +  * where we are gazumped.
> +  */
> + ret = __unsafe_wait_rendering(obj, NULL, !write);
> + if (ret)
> + goto err;
> +

Why do you lift this call super early, tracing will be affected at
least.

>   intel_runtime_pm_get(dev_priv);
>  
>   /* We don't use vmf->pgoff since that has the fake offset */
> @@ -1655,23 +1646,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
> vm_fault *vmf)
>  
>   ret = i915_mutex_lock_interruptible(dev);
>   if (ret)
> - goto out;
> + goto err_rpm;
>  
>   trace_i915_gem_object_fault(obj, page_offset, true, write);
>  
> - /* Try to flush the object off the GPU first without holding the lock.
> -  * Upon reacquiring the lock, we will perform our sanity checks and then
> -  * repeat the flush holding the lock in the normal manner to catch cases
> -  * where we are gazumped.
> -  */
> - ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
> - if (ret)
> - goto unlock;
> -
>   /* Access to snoopable pages through the GTT is incoherent. */
>   if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
>   ret = -EFAULT;
> - goto unlock;
> + goto err_unlock;
>   }
>  
>   /* Use a partial view if the object is bigger than the aperture. */
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU

2016-08-01 Thread Chris Wilson
We can completely avoid taking the struct_mutex around the non-blocking
waits by switching over to the RCU request management (trading the mutex
for a RCU read lock and some complex atomic operations). The improvement
is that we gain further contention reduction, and overall the code
become simpler due to the reduced mutex dancing.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 112 +---
 1 file changed, 47 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d8d3e728230d..f164ad482bdc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -346,24 +346,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object 
*obj,
return 0;
 }
 
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
+/* A nonblocking variant of the above wait. Must be called prior to
+ * acquiring the mutex for the object, as the object state may change
+ * during this call. A reference must be held by the caller for the object.
  */
 static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-   struct intel_rps_client *rps,
-   bool readonly)
+__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
+   struct intel_rps_client *rps,
+   bool readonly)
 {
-   struct drm_device *dev = obj->base.dev;
-   struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
struct i915_gem_active *active;
unsigned long active_mask;
-   int ret, i, n = 0;
-
-   lockdep_assert_held(>struct_mutex);
-   GEM_BUG_ON(!to_i915(dev)->mm.interruptible);
+   int idx;
 
-   active_mask = i915_gem_object_get_active(obj);
+   active_mask = __I915_BO_ACTIVE(obj);
if (!active_mask)
return 0;
 
@@ -374,25 +370,16 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
active = >last_write;
}
 
-   for_each_active(active_mask, i) {
-   struct drm_i915_gem_request *req;
+   for_each_active(active_mask, idx) {
+   int ret;
 
-   req = i915_gem_active_get([i],
- >base.dev->struct_mutex);
-   if (req)
-   requests[n++] = req;
+   ret = i915_gem_active_wait_unlocked([idx],
+   true, NULL, rps);
+   if (ret)
+   return ret;
}
 
-   mutex_unlock(>struct_mutex);
-   ret = 0;
-   for (i = 0; ret == 0 && i < n; i++)
-   ret = i915_wait_request(requests[i], true, NULL, rps);
-   mutex_lock(>struct_mutex);
-
-   for (i = 0; i < n; i++)
-   i915_gem_request_put(requests[i]);
-
-   return ret;
+   return 0;
 }
 
 static struct intel_rps_client *to_rps_client(struct drm_file *file)
@@ -1461,10 +1448,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
int ret;
 
/* Only handle setting domains to types used by the CPU. */
-   if (write_domain & I915_GEM_GPU_DOMAINS)
-   return -EINVAL;
-
-   if (read_domains & I915_GEM_GPU_DOMAINS)
+   if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
 
/* Having something in the write domain implies it's in the read
@@ -1473,25 +1457,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
if (write_domain != 0 && read_domains != write_domain)
return -EINVAL;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
-
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
/* Try to flush the object off the GPU without holding the lock.
 * We will repeat the flush holding the lock in the normal manner
 * to catch cases where we are gazumped.
 */
-   ret = i915_gem_object_wait_rendering__nonblocking(obj,
- to_rps_client(file),
- !write_domain);
+   ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
if (ret)
-   goto unref;
+   goto out_unlocked;
+
+   ret = i915_mutex_lock_interruptible(dev);
+   if (ret)
+   goto out_unlocked;
 
if (read_domains & I915_GEM_DOMAIN_GTT)
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
@@ -1501,11 +1481,13 @@