On Wed, Apr 26, 2017 at 11:35:33AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/04/2017 09:06, Chris Wilson wrote:
> >If we are enabling the breadcrumbs signaling prior to submitting the
> >request, we know that we cannot have missed the interrupt and can
> >therefore skip immediately waking the signaler to check.
> >
> >This reduces a significant chunk of the __i915_gem_request_submit()
> >overhead for inter-engine synchronisation, for example in gem_exec_whisper.
> >
> >Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
> > drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
> > drivers/gpu/drm/i915/intel_breadcrumbs.c   | 7 ++++---
> > drivers/gpu/drm/i915/intel_ringbuffer.h    | 3 ++-
> > 4 files changed, 9 insertions(+), 7 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> >b/drivers/gpu/drm/i915/i915_gem_request.c
> >index 8bbec19e267a..7b9a509f00f3 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.c
> >@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence 
> >*fence)
> >     if (i915_fence_signaled(fence))
> >             return false;
> >
> >-    intel_engine_enable_signaling(to_request(fence));
> >+    intel_engine_enable_signaling(to_request(fence), true);
> >     return true;
> > }
> >
> >@@ -448,7 +448,7 @@ void __i915_gem_request_submit(struct 
> >drm_i915_gem_request *request)
> >     spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> >     request->global_seqno = seqno;
> >     if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> >-            intel_engine_enable_signaling(request);
> >+            intel_engine_enable_signaling(request, false);
> >     spin_unlock(&request->lock);
> >
> >     engine->emit_breadcrumb(request,
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
> >b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index a4eca6ac449b..7ccb22709efb 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -649,7 +649,7 @@ static void nested_enable_signaling(struct 
> >drm_i915_gem_request *rq)
> >     trace_dma_fence_enable_signal(&rq->fence);
> >
> >     spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> >-    intel_engine_enable_signaling(rq);
> >+    intel_engine_enable_signaling(rq, true);
> >     spin_unlock(&rq->lock);
> > }
> >
> >diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
> >b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >index 0839f928bc57..8f52fd5f6102 100644
> >--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >@@ -683,12 +683,13 @@ static int intel_breadcrumbs_signaler(void *arg)
> >     return 0;
> > }
> >
> >-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> >+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >+                               bool wakeup)
> > {
> >     struct intel_engine_cs *engine = request->engine;
> >     struct intel_breadcrumbs *b = &engine->breadcrumbs;
> >     struct rb_node *parent, **p;
> >-    bool first, wakeup;
> >+    bool first;
> >     u32 seqno;
> >
> >     /* Note that we may be called from an interrupt handler on another
> >@@ -721,7 +722,7 @@ void intel_engine_enable_signaling(struct 
> >drm_i915_gem_request *request)
> >      * If we are the oldest waiter, enable the irq (after which we
> >      * must double check that the seqno did not complete).
> >      */
> >-    wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
> >+    wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
> >
> >     /* Now insert ourselves into the retirement ordered list of signals
> >      * on this engine. We track the oldest seqno as that will be the
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> >b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >index 3f7d5666bcf6..de5b968f508a 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >@@ -672,7 +672,8 @@ bool intel_engine_add_wait(struct intel_engine_cs 
> >*engine,
> >                        struct intel_wait *wait);
> > void intel_engine_remove_wait(struct intel_engine_cs *engine,
> >                           struct intel_wait *wait);
> >-void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
> >+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >+                               bool wakeup);
> > void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
> >
> > static inline bool intel_engine_has_waiter(const struct intel_engine_cs 
> > *engine)
> >
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Ta, pushed this one as it's been the only thing to give a clear
improvement to gem_exec_whisper + execlist in yonks :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to