Chris Wilson <ch...@chris-wilson.co.uk> writes:

> If we receive the error interrupt before the CS interrupt, we may find
> ourselves without an active request to reset, skipping the GPU reset.
> All because the attempt to reset was too early.
>

With the tracing, we will see the the out of sync situations
so

Reviewed-by: Mika Kuoppala <mika.kuopp...@linux.intel.com>


> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 41 ++++++++++++++++++++++++++++-
>  1 file changed, 40 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3479cda37fdc..f028114714cd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2804,6 +2804,45 @@ static struct execlists_capture *capture_regs(struct 
> intel_engine_cs *engine)
>       return NULL;
>  }
>  
> +static struct i915_request *
> +active_context(struct intel_engine_cs *engine, u32 ccid)
> +{
> +     const struct intel_engine_execlists * const el = &engine->execlists;
> +     struct i915_request * const *port, *rq;
> +
> +     /*
> +      * Use the most recent result from process_csb(), but just in case
> +      * we trigger an error (via interrupt) before the first CS event has
> +      * been written, peek at the next submission.
> +      */
> +
> +     for (port = el->active; (rq = *port); port++) {
> +             if (upper_32_bits(rq->context->lrc_desc) == ccid) {
> +                     ENGINE_TRACE(engine,
> +                                  "ccid found at active:%zd\n",
> +                                  port - el->active);
> +                     return rq;
> +             }
> +     }
> +
> +     for (port = el->pending; (rq = *port); port++) {
> +             if (upper_32_bits(rq->context->lrc_desc) == ccid) {
> +                     ENGINE_TRACE(engine,
> +                                  "ccid found at pending:%zd\n",
> +                                  port - el->pending);
> +                     return rq;
> +             }
> +     }
> +
> +     ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
> +     return NULL;
> +}
> +
> +static u32 active_ccid(struct intel_engine_cs *engine)
> +{
> +     return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
> +}
> +
>  static bool execlists_capture(struct intel_engine_cs *engine)
>  {
>       struct execlists_capture *cap;
> @@ -2821,7 +2860,7 @@ static bool execlists_capture(struct intel_engine_cs 
> *engine)
>               return true;
>  
>       spin_lock_irq(&engine->active.lock);
> -     cap->rq = execlists_active(&engine->execlists);
> +     cap->rq = active_context(engine, active_ccid(engine));
>       if (cap->rq) {
>               cap->rq = active_request(cap->rq->context->timeline, cap->rq);
>               cap->rq = i915_request_get_rcu(cap->rq);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to