Re: [PATCH 1/4] drm/panthor: Force an immediate reset on unrecoverable faults
On Thu, May 02, 2024 at 08:38:09PM +0200, Boris Brezillon wrote: > If the FW reports an unrecoverable fault, we need to reset the GPU > before we can start re-using it again. > > Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau > --- > drivers/gpu/drm/panthor/panthor_device.c | 1 + > drivers/gpu/drm/panthor/panthor_device.h | 1 + > drivers/gpu/drm/panthor/panthor_sched.c | 11 ++- > 3 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_device.c > b/drivers/gpu/drm/panthor/panthor_device.c > index 75276cbeba20..4c5b54e7abb7 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.c > +++ b/drivers/gpu/drm/panthor/panthor_device.c > @@ -293,6 +293,7 @@ static const struct panthor_exception_info > panthor_exception_infos[] = { > PANTHOR_EXCEPTION(ACTIVE), > PANTHOR_EXCEPTION(CS_RES_TERM), > PANTHOR_EXCEPTION(CS_CONFIG_FAULT), > + PANTHOR_EXCEPTION(CS_UNRECOVERABLE), > PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), > PANTHOR_EXCEPTION(CS_BUS_FAULT), > PANTHOR_EXCEPTION(CS_INSTR_INVALID), > diff --git a/drivers/gpu/drm/panthor/panthor_device.h > b/drivers/gpu/drm/panthor/panthor_device.h > index 2fdd671b38fd..e388c0472ba7 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.h > +++ b/drivers/gpu/drm/panthor/panthor_device.h > @@ -216,6 +216,7 @@ enum drm_panthor_exception_type { > DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, > DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, > + DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, > DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, > DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, > DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c > b/drivers/gpu/drm/panthor/panthor_sched.c > index 7f16a4a14e9a..1d2708c3ab0a 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -1281,7 +1281,16 @@ cs_slot_process_fatal_event_locked(struct > panthor_device *ptdev, > if (group) > group->fatal_queues |= BIT(cs_id); > > - sched_queue_delayed_work(sched, tick, 0); > + if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) > { > + /* If this exception is unrecoverable, queue a reset, and make > + * sure we stop scheduling groups until the reset has happened. > + */ > + panthor_device_schedule_reset(ptdev); > + cancel_delayed_work(&sched->tick_work); > + } else { > + sched_queue_delayed_work(sched, tick, 0); > + } > + > drm_warn(&ptdev->base, >"CSG slot %d CS slot: %d\n" >"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" > -- > 2.44.0 > -- | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --- ¯\_(ツ)_/¯
Re: [PATCH 1/4] drm/panthor: Force an immediate reset on unrecoverable faults
On 02/05/2024 19:38, Boris Brezillon wrote: > If the FW reports an unrecoverable fault, we need to reset the GPU > before we can start re-using it again. > > Signed-off-by: Boris Brezillon Reviewed-by: Steven Price > --- > drivers/gpu/drm/panthor/panthor_device.c | 1 + > drivers/gpu/drm/panthor/panthor_device.h | 1 + > drivers/gpu/drm/panthor/panthor_sched.c | 11 ++- > 3 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_device.c > b/drivers/gpu/drm/panthor/panthor_device.c > index 75276cbeba20..4c5b54e7abb7 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.c > +++ b/drivers/gpu/drm/panthor/panthor_device.c > @@ -293,6 +293,7 @@ static const struct panthor_exception_info > panthor_exception_infos[] = { > PANTHOR_EXCEPTION(ACTIVE), > PANTHOR_EXCEPTION(CS_RES_TERM), > PANTHOR_EXCEPTION(CS_CONFIG_FAULT), > + PANTHOR_EXCEPTION(CS_UNRECOVERABLE), > PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), > PANTHOR_EXCEPTION(CS_BUS_FAULT), > PANTHOR_EXCEPTION(CS_INSTR_INVALID), > diff --git a/drivers/gpu/drm/panthor/panthor_device.h > b/drivers/gpu/drm/panthor/panthor_device.h > index 2fdd671b38fd..e388c0472ba7 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.h > +++ b/drivers/gpu/drm/panthor/panthor_device.h > @@ -216,6 +216,7 @@ enum drm_panthor_exception_type { > DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, > DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, > + DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, > DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, > DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, > DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c > b/drivers/gpu/drm/panthor/panthor_sched.c > index 7f16a4a14e9a..1d2708c3ab0a 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -1281,7 +1281,16 @@ cs_slot_process_fatal_event_locked(struct > panthor_device *ptdev, > if (group) > group->fatal_queues |= BIT(cs_id); > > - sched_queue_delayed_work(sched, tick, 0); > + if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) > { > + /* If this exception is unrecoverable, queue a reset, and make > + * sure we stop scheduling groups until the reset has happened. > + */ > + panthor_device_schedule_reset(ptdev); > + cancel_delayed_work(&sched->tick_work); > + } else { > + sched_queue_delayed_work(sched, tick, 0); > + } > + > drm_warn(&ptdev->base, >"CSG slot %d CS slot: %d\n" >"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
[PATCH 1/4] drm/panthor: Force an immediate reset on unrecoverable faults
If the FW reports an unrecoverable fault, we need to reset the GPU before we can start re-using it again. Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_device.c | 1 + drivers/gpu/drm/panthor/panthor_device.h | 1 + drivers/gpu/drm/panthor/panthor_sched.c | 11 ++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 75276cbeba20..4c5b54e7abb7 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -293,6 +293,7 @@ static const struct panthor_exception_info panthor_exception_infos[] = { PANTHOR_EXCEPTION(ACTIVE), PANTHOR_EXCEPTION(CS_RES_TERM), PANTHOR_EXCEPTION(CS_CONFIG_FAULT), + PANTHOR_EXCEPTION(CS_UNRECOVERABLE), PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), PANTHOR_EXCEPTION(CS_BUS_FAULT), PANTHOR_EXCEPTION(CS_INSTR_INVALID), diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index 2fdd671b38fd..e388c0472ba7 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -216,6 +216,7 @@ enum drm_panthor_exception_type { DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, + DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 7f16a4a14e9a..1d2708c3ab0a 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1281,7 +1281,16 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, if (group) group->fatal_queues |= BIT(cs_id); - sched_queue_delayed_work(sched, tick, 0); + if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) { + /* If this exception is unrecoverable, queue a reset, and make +* sure we stop scheduling groups until the reset has happened. +*/ + panthor_device_schedule_reset(ptdev); + cancel_delayed_work(&sched->tick_work); + } else { + sched_queue_delayed_work(sched, tick, 0); + } + drm_warn(&ptdev->base, "CSG slot %d CS slot: %d\n" "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" -- 2.44.0