On Mon, 15 Dec 2025 11:54:53 +0000
Lukas Zapolskas <[email protected]> wrote:

> From: Paul Toadere <[email protected]>
> 
> Though faulted queues do not prevent further submission, the
> recoverable faults may have further consequences which are
> worth recording and providing to the user.
> 
> Signed-off-by: Paul Toadere <[email protected]>
> Co-developed-by: Lukas Zapolskas <[email protected]>
> Signed-off-by: Lukas Zapolskas <[email protected]>
> ---
>  drivers/gpu/drm/panthor/panthor_sched.c | 18 +++++++++++++++---
>  include/uapi/drm/panthor_drm.h          | 11 +++++++++--
>  2 files changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
> b/drivers/gpu/drm/panthor/panthor_sched.c
> index a17b067a0439..eb8841beba39 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -569,6 +569,14 @@ struct panthor_group {
>       /** @fatal_queues: Bitmask reflecting the queues that hit a fatal 
> exception. */
>       u32 fatal_queues;
>  
> +     /**
> +      * @fault_queues: Bitmask reflecting the queues that hit a recoverable 
> exception.
> +      *
> +      * This field is reset when the GROUP_GET_STATE ioctl is used to 
> collect the fault
> +      * information.
> +      */
> +     u32 fault_queues;

s/fault_queues/faulty_queues/ ?

> +
>       /** @tiler_oom: Mask of queues that have a tiler OOM event to process. 
> */
>       atomic_t tiler_oom;
>  
> @@ -1553,6 +1561,8 @@ cs_slot_process_fault_event_locked(struct 
> panthor_device *ptdev,
>       if (group) {
>               drm_warn(&ptdev->base, "CS_FAULT: pid=%d, comm=%s\n",
>                        group->task_info.pid, group->task_info.comm);
> +
> +             group->fault_queues |= BIT(cs_id);
>       }
>  
>       drm_warn(&ptdev->base,
> @@ -3807,9 +3817,6 @@ int panthor_group_get_state(struct panthor_file *pfile,
>       struct panthor_scheduler *sched = ptdev->scheduler;
>       struct panthor_group *group;
>  
> -     if (get_state->pad)
> -             return -EINVAL;
> -
>       group = group_from_handle(gpool, get_state->group_handle);
>       if (!group)
>               return -EINVAL;
> @@ -3825,6 +3832,11 @@ int panthor_group_get_state(struct panthor_file *pfile,
>       }
>       if (group->innocent)
>               get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT;
> +     if (group->fault_queues) {
> +             get_state->state |= DRM_PANTHOR_GROUP_STATE_QUEUE_FAULT;
> +             get_state->fault_queues = group->fault_queues;
> +             group->fault_queues = 0;
> +     }
>       mutex_unlock(&sched->lock);
>  
>       group_put(group);
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index e238c6264fa1..77262d2b9672 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -965,6 +965,13 @@ enum drm_panthor_group_state_flags {
>        * DRM_PANTHOR_GROUP_STATE_FATAL_FAULT is not.
>        */
>       DRM_PANTHOR_GROUP_STATE_INNOCENT = 1 << 2,
> +
> +     /**
> +      * @DRM_PANTHOR_GROUP_STATE_QUEUE_FAULT: Group had recoverable faults.
> +      *
> +      * When a group ends up with this flag set, jobs can still be submitted 
> to its queues.
> +      */
> +     DRM_PANTHOR_GROUP_STATE_QUEUE_FAULT = 1 << 3,
>  };
>  
>  /**
> @@ -986,8 +993,8 @@ struct drm_panthor_group_get_state {
>       /** @fatal_queues: Bitmask of queues that faced fatal faults. */
>       __u32 fatal_queues;
>  
> -     /** @pad: MBZ */
> -     __u32 pad;
> +     /** @fatal_queues: Bitmask of queues that faced fatal faults. */

s/fatal/recoverable/

> +     __u32 fault_queues;
>  };
>  
>  /**

Reply via email to