We dropped calling process_csb prior to handling direct submission in order to avoid the nesting of spinlocks and lift process_csb() and the majority of the tasklet out of irq-off. However, we do want to avoid ksoftirqd latency in the fast path, so try and pull the interrupt-bh local to direct submission if we can acquire the tasklet's lock.
v2: Tweak the balance to avoid over submitting lite-restores Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Francisco Jerez <curroje...@riseup.net> Cc: Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 40 +++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f09dd87324b9..0d9c6ea4adaa 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2884,29 +2884,47 @@ static void queue_request(struct intel_engine_cs *engine, set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } -static void __submit_queue_imm(struct intel_engine_cs *engine) +static bool pending_csb(const struct intel_engine_execlists *el) { - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (reset_in_progress(execlists)) - return; /* defer until we restart the engine following reset */ - - if (execlists->tasklet.func == execlists_submission_tasklet) - __execlists_submission_tasklet(engine); - else - tasklet_hi_schedule(&execlists->tasklet); + return READ_ONCE(*el->csb_write) != READ_ONCE(el->csb_head); } static void submit_queue(struct intel_engine_cs *engine, const struct i915_request *rq) { struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_request *inflight; if (rq_prio(rq) <= execlists->queue_priority_hint) return; + if (reset_in_progress(execlists)) + return; /* defer until we restart the engine following reset */ + + /* Hopefully we clear execlists->pending[] to let us through */ + if (execlists->pending[0] && tasklet_trylock(&execlists->tasklet)) { + process_csb(engine); + tasklet_unlock(&execlists->tasklet); + } + + /* + * Suppress immediate lite-restores, leave that to the tasklet. + * + * However, we leave the queue_priority_hint unset so that if we do + * submit a second context, we push that into ELSP[1] immediately. + */ + inflight = execlists_active(&engine->execlists); + if (inflight && inflight->context == rq->context) + return; + execlists->queue_priority_hint = rq_prio(rq); - __submit_queue_imm(engine); + __execlists_submission_tasklet(engine); + + /* Try and pull an interrupt-bh queued on another CPU to here */ + if (pending_csb(execlists) && tasklet_trylock(&execlists->tasklet)) { + process_csb(engine); + tasklet_unlock(&execlists->tasklet); + } } static bool ancestor_on_hold(const struct intel_engine_cs *engine, -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx