Chris Wilson <ch...@chris-wilson.co.uk> writes:

> Instead of synchronously cancelling the timer and re-enabling it inside
> the reset callbacks, keep the timer enabled and let it die on its next
> wakeup if no longer required. This allows
> intel_engine_reset_breadcrumbs() to be used from an atomic
> (timer/softirq) context such as required for resetting an engine.
>
> It also allows us to react better to the user poking around debugfs for
> testing missed irqs.
>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 27 +++++++++++++++++-------
>  1 file changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
> b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 671a6d61e29d..0a2128c6b418 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -130,11 +130,12 @@ static void intel_breadcrumbs_hangcheck(struct 
> timer_list *t)
>  
>  static void intel_breadcrumbs_fake_irq(struct timer_list *t)
>  {
> -     struct intel_engine_cs *engine = from_timer(engine, t,
> -                                                 breadcrumbs.fake_irq);
> +     struct intel_engine_cs *engine =
> +             from_timer(engine, t, breadcrumbs.fake_irq);
>       struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  
> -     /* The timer persists in case we cannot enable interrupts,
> +     /*
> +      * The timer persists in case we cannot enable interrupts,
>        * or if we have previously seen seqno/interrupt incoherency
>        * ("missed interrupt" syndrome, better known as a "missed breadcrumb").
>        * Here the worker will wake up every jiffie in order to kick the
> @@ -148,6 +149,12 @@ static void intel_breadcrumbs_fake_irq(struct timer_list 
> *t)
>       if (!b->irq_armed)
>               return;
>  
> +     /* If the user has disabled the fake-irq, restore the hangchecking */
> +     if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) {
> +             mod_timer(&b->hangcheck, wait_timeout());
> +             return;
> +     }
> +

Looking at the cancel_fake_irq() now, which we still need to keep as
sync, I think there is race introduce now as this can queue itself.

I think we want to also change the cancel_fake_irq() to do
the bit clearing first, not last after the del_timer_syncs().

-Mika

>       mod_timer(&b->fake_irq, jiffies + 1);
>  }
>  
> @@ -840,15 +847,22 @@ void intel_engine_reset_breadcrumbs(struct 
> intel_engine_cs *engine)
>  {
>       struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  
> -     cancel_fake_irq(engine);
>       spin_lock_irq(&b->irq_lock);
>  
> +     /*
> +      * Leave the fake_irq timer enabled (if it is running), but clear the
> +      * bit so that it turns itself off on its next wake up and goes back
> +      * to the long hangcheck interval if still required.
> +      */
> +     clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
> +
>       if (b->irq_enabled)
>               irq_enable(engine);
>       else
>               irq_disable(engine);
>  
> -     /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
> +     /*
> +      * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
>        * GPU is active and may have already executed the MI_USER_INTERRUPT
>        * before the CPU is ready to receive. However, the engine is currently
>        * idle (we haven't started it yet), there is no possibility for a
> @@ -857,9 +871,6 @@ void intel_engine_reset_breadcrumbs(struct 
> intel_engine_cs *engine)
>        */
>       clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>  
> -     if (b->irq_armed)
> -             enable_fake_irq(b);
> -
>       spin_unlock_irq(&b->irq_lock);
>  }
>  
> -- 
> 2.17.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to