xe: Convert GuC Engine print to snapshot capture and print.

Matthew Brost Tue, 02 May 2023 08:02:41 -0700

On Wed, Apr 26, 2023 at 04:57:07PM -0400, Rodrigo Vivi wrote:
> The goal is to allow for a snapshot capture to be taken at the time
> of the crash, while the print out can happen at a later time through
> the exposed devcoredump virtual device.
> 
> Signed-off-by: Rodrigo Vivi <rodrigo.v...@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_submit.c       | 212 +++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_guc_submit.h       |  10 +-
>  drivers/gpu/drm/xe/xe_guc_submit_types.h |  91 ++++++++++
>  3 files changed, 274 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c 
> b/drivers/gpu/drm/xe/xe_guc_submit.c
> index a5fe7755ce4c..9c06411f857f 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -1596,75 +1596,211 @@ int xe_guc_engine_reset_failure_handler(struct 
> xe_guc *guc, u32 *msg, u32 len)
>       return 0;
>  }
>  
> -static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
> +static void
> +guc_engine_wq_snapshot_capture(struct xe_engine *e,
> +                            struct xe_guc_submit_engine_snapshot *snapshot)
>  {
>       struct xe_guc *guc = engine_to_guc(e);
>       struct xe_device *xe = guc_to_xe(guc);
>       struct iosys_map map = xe_lrc_parallel_map(e->lrc);
>       int i;
>  
> +     snapshot->guc.wqi_head = e->guc->wqi_head;
> +     snapshot->guc.wqi_tail = e->guc->wqi_tail;
> +     snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
> +     snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
> +     snapshot->parallel.wq_desc.status = parallel_read(xe, map,
> +                                                       wq_desc.wq_status);
> +
> +     if (snapshot->parallel.wq_desc.head !=
> +         snapshot->parallel.wq_desc.tail) {
> +             for (i = snapshot->parallel.wq_desc.head;
> +                  i != snapshot->parallel.wq_desc.tail;
> +                  i = (i + sizeof(u32)) % WQ_SIZE)
> +                     snapshot->parallel.wq[i / sizeof(u32)] =
> +                             parallel_read(xe, map, wq[i / sizeof(u32)]);
> +     }
> +}
> +
> +static void
> +guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
> +                          struct drm_printer *p)
> +{
> +     int i;
> +
>       drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
> -                e->guc->wqi_head, parallel_read(xe, map, wq_desc.head));
> +                snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
>       drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
> -                e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail));
> -     drm_printf(p, "\tWQ status: %u\n",
> -                parallel_read(xe, map, wq_desc.wq_status));
> -     if (parallel_read(xe, map, wq_desc.head) !=
> -         parallel_read(xe, map, wq_desc.tail)) {
> -             for (i = parallel_read(xe, map, wq_desc.head);
> -                  i != parallel_read(xe, map, wq_desc.tail);
> +                snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
> +     drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
> +
> +     if (snapshot->parallel.wq_desc.head !=
> +         snapshot->parallel.wq_desc.tail) {
> +             for (i = snapshot->parallel.wq_desc.head;
> +                  i != snapshot->parallel.wq_desc.tail;
>                    i = (i + sizeof(u32)) % WQ_SIZE)
>                       drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
> -                                parallel_read(xe, map, wq[i / sizeof(u32)]));
> +                                snapshot->parallel.wq[i / sizeof(u32)]);
>       }
>  }
>  
> -static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
> +/**
> + * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine.
> + * @e: Xe Engine.
> + *
> + * This can be printed out in a later stage like during dev_coredump
> + * analysis.
> + *
> + * Returns: a GuC Submit Engine snapshot object that must be freed by the
> + *       caller, using `xe_guc_engine_snapshot_free`.
> + */
> +struct xe_guc_submit_engine_snapshot *
> +xe_guc_engine_snapshot_capture(struct xe_engine *e)
>  {
>       struct drm_gpu_scheduler *sched = &e->guc->sched;
>       struct xe_sched_job *job;
> +     struct xe_guc_submit_engine_snapshot *snapshot;
>       int i;
>  
> -     drm_printf(p, "\nGuC ID: %d\n", e->guc->id);
> -     drm_printf(p, "\tName: %s\n", e->name);
> -     drm_printf(p, "\tClass: %d\n", e->class);
> -     drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask);
> -     drm_printf(p, "\tWidth: %d\n", e->width);
> -     drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount));
> -     drm_printf(p, "\tTimeout: %ld (ms)\n", sched->timeout);
> -     drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us);
> -     drm_printf(p, "\tPreempt timeout: %u (us)\n",
> -                e->sched_props.preempt_timeout_us);
> +     snapshot = kzalloc(sizeof(struct xe_guc_submit_engine_snapshot),
> +                        GFP_ATOMIC);


For the whole file you need to check if any of the allocs fail. Also
let's say if just the last alloc fails I wouldn't fail the entire
capture, just abort at that point (i.e. some info is better than none).

Matt

> +
> +     snapshot->guc.id = e->guc->id;
> +     memcpy(&snapshot->name, &e->name, sizeof(snapshot->name));
> +     snapshot->class = e->class;
> +     snapshot->logical_mask = e->logical_mask;
> +     snapshot->width = e->width;
> +     snapshot->refcount = kref_read(&e->refcount);
> +     snapshot->sched_timeout = sched->timeout;
> +     snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us;
> +     snapshot->sched_props.preempt_timeout_us =
> +             e->sched_props.preempt_timeout_us;
> +
> +     snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot),
> +                             GFP_ATOMIC);
> +
>       for (i = 0; i < e->width; ++i ) {
>               struct xe_lrc *lrc = e->lrc + i;
>  
> +             snapshot->lrc[i].context_desc =
> +                     lower_32_bits(xe_lrc_ggtt_addr(lrc));
> +             snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
> +             snapshot->lrc[i].tail.internal = lrc->ring.tail;
> +             snapshot->lrc[i].tail.memory =
> +                     xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
> +             snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
> +             snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
> +
> +     }
> +
> +     snapshot->schedule_state = atomic_read(&e->guc->state);
> +     snapshot->engine_flags = e->flags;
> +
> +     snapshot->parallel_execution = xe_engine_is_parallel(e);
> +     if (snapshot->parallel_execution)
> +             guc_engine_wq_snapshot_capture(e, snapshot);
> +
> +     spin_lock(&sched->job_list_lock);
> +     snapshot->pending_list_size = list_count_nodes(&sched->pending_list);
> +     snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
> +                                      sizeof(struct pending_list_snapshot),
> +                                      GFP_ATOMIC);
> +     i = 0;
> +     list_for_each_entry(job, &sched->pending_list, drm.list) {
> +             snapshot->pending_list[i].seqno = xe_sched_job_seqno(job);
> +             snapshot->pending_list[i].fence =
> +                     dma_fence_is_signaled(job->fence) ? 1 : 0;
> +             snapshot->pending_list[i].finished =
> +                     dma_fence_is_signaled(&job->drm.s_fence->finished)
> +                     ? 1 : 0;
> +             i++;
> +     }
> +     spin_unlock(&sched->job_list_lock);
> +
> +     return snapshot;
> +}
> +
> +/**
> + * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot.
> + * @snapshot: GuC Submit Engine snapshot object.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function prints out a given GuC Submit Engine snapshot object.
> + */
> +void
> +xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
> +                          struct drm_printer *p)
> +{
> +     int i;
> +
> +     drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
> +     drm_printf(p, "\tName: %s\n", snapshot->name);
> +     drm_printf(p, "\tClass: %d\n", snapshot->class);
> +     drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
> +     drm_printf(p, "\tWidth: %d\n", snapshot->width);
> +     drm_printf(p, "\tRef: %d\n", snapshot->refcount);
> +     drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
> +     drm_printf(p, "\tTimeslice: %u (us)\n",
> +                snapshot->sched_props.timeslice_us);
> +     drm_printf(p, "\tPreempt timeout: %u (us)\n",
> +                snapshot->sched_props.preempt_timeout_us);
> +
> +     for (i = 0; i < snapshot->width; ++i ) {
>               drm_printf(p, "\tHW Context Desc: 0x%08x\n",
> -                        lower_32_bits(xe_lrc_ggtt_addr(lrc)));
> +                        snapshot->lrc[i].context_desc);
>               drm_printf(p, "\tLRC Head: (memory) %u\n",
> -                        xe_lrc_ring_head(lrc));
> +                        snapshot->lrc[i].head);
>               drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
> -                        lrc->ring.tail,
> -                        xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL));
> +                        snapshot->lrc[i].tail.internal,
> +                        snapshot->lrc[i].tail.memory);
>               drm_printf(p, "\tStart seqno: (memory) %d\n",
> -                        xe_lrc_start_seqno(lrc));
> -             drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc));
> +                        snapshot->lrc[i].start_seqno);
> +             drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
>       }
> -     drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state));
> -     drm_printf(p, "\tFlags: 0x%lx\n", e->flags);
> -     if (xe_engine_is_parallel(e))
> -             guc_engine_wq_print(e, p);
> +     drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
> +     drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags);
>  
> -     spin_lock(&sched->job_list_lock);
> +     if (snapshot->parallel_execution)
> +             guc_engine_wq_snapshot_print(snapshot, p);
>  
> -     list_for_each_entry(job, &sched->pending_list, drm.list)
> +     for(i = 0; i < snapshot->pending_list_size; i++)
>               drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
> -                        xe_sched_job_seqno(job),
> -                        dma_fence_is_signaled(job->fence) ? 1 : 0,
> -                        dma_fence_is_signaled(&job->drm.s_fence->finished) ?
> -                        1 : 0);
> -     spin_unlock(&sched->job_list_lock);
> +                        snapshot->pending_list[i].seqno,
> +                        snapshot->pending_list[i].fence,
> +                        snapshot->pending_list[i].finished);
> +}
> +
> +/**
> + * xe_guc_engine_snapshot_free - Free all allocated objects for a given
> + * snapshot.
> + * @snapshot: GuC Submit Engine snapshot object.
> + *
> + * This function free all the memory that needed to be allocated at capture
> + * time.
> + */
> +void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot 
> *snapshot)
> +{
> +     kfree(snapshot->lrc);
> +     kfree(snapshot->pending_list);
> +     kfree(snapshot);
> +}
> +
> +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
> +{
> +     struct xe_guc_submit_engine_snapshot *snapshot;
> +
> +     snapshot = xe_guc_engine_snapshot_capture(e);
> +     xe_guc_engine_snapshot_print(snapshot, p);
> +     xe_guc_engine_snapshot_free(snapshot);
>  }
>  
> +/**
> + * xe_guc_submit_print - GuC Submit Print.
> + * @guc: GuC.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function capture and prints snapshots of **all** GuC Engines.
> + */
>  void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
>  {
>       struct xe_engine *e;
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h 
> b/drivers/gpu/drm/xe/xe_guc_submit.h
> index 8002734d6f24..4153c2d22013 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.h
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.h
> @@ -13,7 +13,6 @@ struct xe_engine;
>  struct xe_guc;
>  
>  int xe_guc_submit_init(struct xe_guc *guc);
> -void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
>  
>  int xe_guc_submit_reset_prepare(struct xe_guc *guc);
>  void xe_guc_submit_reset_wait(struct xe_guc *guc);
> @@ -27,4 +26,13 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc 
> *guc, u32 *msg,
>                                          u32 len);
>  int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 
> len);
>  
> +struct xe_guc_submit_engine_snapshot *
> +xe_guc_engine_snapshot_capture(struct xe_engine *e);
> +void
> +xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
> +                          struct drm_printer *p);
> +void
> +xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot);
> +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h 
> b/drivers/gpu/drm/xe/xe_guc_submit_types.h
> index d369ea0bad60..0b726609dc14 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
> @@ -61,4 +61,95 @@ struct guc_submit_parallel_scratch {
>       u32 wq[WQ_SIZE / sizeof(u32)];
>  };
>  
> +struct lrc_snapshot {
> +     u32 context_desc;
> +     u32 head;
> +     struct {
> +             u32 internal;
> +             u32 memory;
> +     } tail;
> +     u32 start_seqno;
> +     u32 seqno;
> +};
> +
> +struct pending_list_snapshot {
> +     u32 seqno;
> +     bool fence;
> +     bool finished;
> +};
> +
> +/**
> + * struct xe_guc_submit_engine_snapshot - Snapshot for devcoredump
> + */
> +struct xe_guc_submit_engine_snapshot {
> +     /** @name: name of this engine */
> +     char name[MAX_FENCE_NAME_LEN];
> +     /** @class: class of this engine */
> +     enum xe_engine_class class;
> +     /**
> +      * @logical_mask: logical mask of where job submitted to engine can run
> +      */
> +     u32 logical_mask;
> +     /** @width: width (number BB submitted per exec) of this engine */
> +     u16 width;
> +     /** @refcount: ref count of this engine */
> +     u32 refcount;
> +     /**
> +      * @sched_timeout: the time after which a job is removed from the
> +      * scheduler.
> +      */
> +     long sched_timeout;
> +
> +     /** @sched_props: scheduling properties */
> +     struct {
> +             /** @timeslice_us: timeslice period in micro-seconds */
> +             u32 timeslice_us;
> +             /** @preempt_timeout_us: preemption timeout in micro-seconds */
> +             u32 preempt_timeout_us;
> +     } sched_props;
> +
> +     /** @lrc: LRC Snapshot */
> +     struct lrc_snapshot *lrc;
> +
> +     /** @schedule_state: Schedule State at the moment of Crash */
> +     u32 schedule_state;
> +     /** @engine_flags: Flags of the faulty engine */
> +     unsigned long engine_flags;
> +
> +     /** @guc: GuC Engine Snapshot */
> +     struct {
> +             /** @wqi_head: work queue item head */
> +             u32 wqi_head;
> +             /** @wqi_tail: work queue item tail */
> +             u32 wqi_tail;
> +             /** @id: GuC id for this xe_engine */
> +             u16 id;
> +     } guc;
> +
> +     /**
> +      * @parallel_execution: Indication if the failure was during parallel
> +      * execution
> +      */
> +     bool parallel_execution;
> +     /** @parallel: snapshot of the useful parallel scratch */
> +     struct {
> +             /** @wq_desc: Workqueue description */
> +             struct {
> +                     /** @head: Workqueue Head */
> +                     u32 head;
> +                     /** @tail: Workqueue Tail */
> +                     u32 tail;
> +                     /** @status: Workqueue Status */
> +                     u32 status;
> +             } wq_desc;
> +             /** @wq: Workqueue Items */
> +             u32 wq[WQ_SIZE / sizeof(u32)];
> +     } parallel;
> +
> +     /** @pending_list_size: Size of the pending list snapshot array */
> +     int pending_list_size;
> +     /** @pending_list: snapshot of the pending list info */
> +     struct pending_list_snapshot *pending_list;
> +};
> +
>  #endif
> -- 
> 2.39.2
>

Re: [Intel-xe] [PATCH 08/14] drm/xe: Convert GuC Engine print to snapshot capture and print.

Reply via email to