Re: [Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU
On ke, 2016-08-03 at 14:36 +0100, Chris Wilson wrote: > On Wed, Aug 03, 2016 at 04:23:16PM +0300, Joonas Lahtinen wrote: > > > > On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote: > > > > > > /** > > > @@ -1647,6 +1629,15 @@ int i915_gem_fault(struct vm_area_struct *vma, > > > struct vm_fault *vmf) > > > int ret = 0; > > > bool write = !!(vmf->flags & FAULT_FLAG_WRITE); > > > > > > + /* Try to flush the object off the GPU first without holding the lock. > > > + * Upon acquiring the lock, we will perform our sanity checks and then > > > + * repeat the flush holding the lock in the normal manner to catch cases > > > + * where we are gazumped. > > > + */ > > > + ret = __unsafe_wait_rendering(obj, NULL, !write); > > > + if (ret) > > > + goto err; > > > + > > Why do you lift this call super early, tracing will be affected at > > least. > I was moving it out of the rpm_get, since we don't need to be inside that > runtime barrier. (That rpm get is very interesting btw, but that's for > later!) > > The trace can be moved as well. With the trace moved and the label simplified, Reviewed-by: Joonas LahtinenRegards, Joonas -- Joonas Lahtinen Open Source Technology Center Intel Corporation ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU
On Wed, Aug 03, 2016 at 04:23:16PM +0300, Joonas Lahtinen wrote: > On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote: > > /** > > @@ -1647,6 +1629,15 @@ int i915_gem_fault(struct vm_area_struct *vma, > > struct vm_fault *vmf) > > int ret = 0; > > bool write = !!(vmf->flags & FAULT_FLAG_WRITE); > > > > + /* Try to flush the object off the GPU first without holding the lock. > > + * Upon acquiring the lock, we will perform our sanity checks and then > > + * repeat the flush holding the lock in the normal manner to catch cases > > + * where we are gazumped. > > + */ > > + ret = __unsafe_wait_rendering(obj, NULL, !write); > > + if (ret) > > + goto err; > > + > > Why do you lift this call super early, tracing will be affected at > least. I was moving it out of the rpm_get, since we don't need to be inside that runtime barrier. (That rpm get is very interesting btw, but that's for later!) The trace can be moved as well. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU
On ma, 2016-08-01 at 19:22 +0100, Chris Wilson wrote: > - if (!obj) { > - ret = -ENOENT; > - goto unlock; > - } > + if (!obj) > + return -ENOENT; > > /* Try to flush the object off the GPU without holding the lock. > * We will repeat the flush holding the lock in the normal manner > * to catch cases where we are gazumped. > */ > - ret = i915_gem_object_wait_rendering__nonblocking(obj, > - to_rps_client(file), > - !write_domain); > + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); > if (ret) > - goto unref; > + goto out_unlocked; > + > + ret = i915_mutex_lock_interruptible(dev); > + if (ret) > + goto out_unlocked; > > if (read_domains & I915_GEM_DOMAIN_GTT) > ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); > @@ -1501,11 +1481,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, > void *data, > if (write_domain != 0) > intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); > > -unref: > i915_gem_object_put(obj); > -unlock: > mutex_unlock(>struct_mutex); > return ret; > + > +out_unlocked: This is the sole label, you could call 'err' too. > + i915_gem_object_put_unlocked(obj); > + return ret; > } > > /** > @@ -1647,6 +1629,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct > vm_fault *vmf) > int ret = 0; > bool write = !!(vmf->flags & FAULT_FLAG_WRITE); > > + /* Try to flush the object off the GPU first without holding the lock. > + * Upon acquiring the lock, we will perform our sanity checks and then > + * repeat the flush holding the lock in the normal manner to catch cases > + * where we are gazumped. > + */ > + ret = __unsafe_wait_rendering(obj, NULL, !write); > + if (ret) > + goto err; > + Why do you lift this call super early, tracing will be affected at least. > intel_runtime_pm_get(dev_priv); > > /* We don't use vmf->pgoff since that has the fake offset */ > @@ -1655,23 +1646,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct > vm_fault *vmf) > > ret = i915_mutex_lock_interruptible(dev); > if (ret) > - goto out; > + goto err_rpm; > > trace_i915_gem_object_fault(obj, page_offset, true, write); > > - /* Try to flush the object off the GPU first without holding the lock. > - * Upon reacquiring the lock, we will perform our sanity checks and then > - * repeat the flush holding the lock in the normal manner to catch cases > - * where we are gazumped. > - */ > - ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); > - if (ret) > - goto unlock; > - > /* Access to snoopable pages through the GTT is incoherent. */ > if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { > ret = -EFAULT; > - goto unlock; > + goto err_unlock; > } > > /* Use a partial view if the object is bigger than the aperture. */ -- Joonas Lahtinen Open Source Technology Center Intel Corporation ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 02/16] drm/i915: Convert non-blocking waits for requests over to using RCU
We can completely avoid taking the struct_mutex around the non-blocking waits by switching over to the RCU request management (trading the mutex for a RCU read lock and some complex atomic operations). The improvement is that we gain further contention reduction, and overall the code become simpler due to the reduced mutex dancing. Signed-off-by: Chris Wilson--- drivers/gpu/drm/i915/i915_gem.c | 112 +--- 1 file changed, 47 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8d3e728230d..f164ad482bdc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -346,24 +346,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, return 0; } -/* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. +/* A nonblocking variant of the above wait. Must be called prior to + * acquiring the mutex for the object, as the object state may change + * during this call. A reference must be held by the caller for the object. */ static __must_check int -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) +__unsafe_wait_rendering(struct drm_i915_gem_object *obj, + struct intel_rps_client *rps, + bool readonly) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; struct i915_gem_active *active; unsigned long active_mask; - int ret, i, n = 0; - - lockdep_assert_held(>struct_mutex); - GEM_BUG_ON(!to_i915(dev)->mm.interruptible); + int idx; - active_mask = i915_gem_object_get_active(obj); + active_mask = __I915_BO_ACTIVE(obj); if (!active_mask) return 0; @@ -374,25 +370,16 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, active = >last_write; } - for_each_active(active_mask, i) { - struct drm_i915_gem_request *req; + for_each_active(active_mask, idx) { + int ret; - req = i915_gem_active_get([i], - >base.dev->struct_mutex); - if (req) - requests[n++] = req; + ret = i915_gem_active_wait_unlocked([idx], + true, NULL, rps); + if (ret) + return ret; } - mutex_unlock(>struct_mutex); - ret = 0; - for (i = 0; ret == 0 && i < n; i++) - ret = i915_wait_request(requests[i], true, NULL, rps); - mutex_lock(>struct_mutex); - - for (i = 0; i < n; i++) - i915_gem_request_put(requests[i]); - - return ret; + return 0; } static struct intel_rps_client *to_rps_client(struct drm_file *file) @@ -1461,10 +1448,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, int ret; /* Only handle setting domains to types used by the CPU. */ - if (write_domain & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - if (read_domains & I915_GEM_GPU_DOMAINS) + if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; /* Having something in the write domain implies it's in the read @@ -1473,25 +1457,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0 && read_domains != write_domain) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; /* Try to flush the object off the GPU without holding the lock. * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, - to_rps_client(file), - !write_domain); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); if (ret) - goto unref; + goto out_unlocked; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto out_unlocked; if (read_domains & I915_GEM_DOMAIN_GTT) ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); @@ -1501,11 +1481,13 @@