On 5/20/26 08:38, Prike Liang wrote:
> From: Pierre-Eric Pelloux-Prayer <[email protected]>
> 
> Add ftrace events for tracking the userq fence emit, signal
> and queue state transition.

The queue trace points look good to me, but clear NAK to the fence trace points 
those just duplicates the common trace points in the dma_fence framework.

Regards,
Christian.

> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
> Signed-off-by: Prike Liang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h     | 129 ++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     |  21 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |  13 +-
>  3 files changed, 160 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index 4ff8a4d7bb8b..32d8c36caaf3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -28,6 +28,8 @@
>  #include <linux/types.h>
>  #include <linux/tracepoint.h>
>  
> +#include "amdgpu_userq_fence.h"
> +
>  #undef TRACE_SYSTEM
>  #define TRACE_SYSTEM amdgpu
>  #define TRACE_INCLUDE_FILE amdgpu_trace
> @@ -659,6 +661,133 @@ DEFINE_EVENT(amdgpu_userq_eviction_fence, 
> amdgpu_userq_eviction_fence_enable_sig
>  DEFINE_EVENT(amdgpu_userq_eviction_fence, amdgpu_userq_eviction_fence_signal,
>           TP_PROTO(u64 context, u64 seqno),
>           TP_ARGS(context, seqno));
> +TRACE_EVENT(amdgpu_userq_job_run,
> +         TP_PROTO(struct device *device, struct amdgpu_usermode_queue 
> *queue, struct amdgpu_userq_fence *fence),
> +         TP_ARGS(device, queue, fence),
> +         TP_STRUCT__entry(
> +                          __field(u64, fence_context)
> +                          __field(u64, fence_seqno)
> +                          __string(dev, dev_name(device))
> +                          __field(u64, doorbell_index)
> +                          __field(u64, client_id)
> +                          __field(u32, queue_type)
> +                          ),
> +         TP_fast_assign(
> +                        __entry->fence_context = fence->base.context;
> +                        __entry->fence_seqno = fence->base.seqno;
> +                        __assign_str(dev);
> +                        __entry->doorbell_index = queue->doorbell_index;
> +                        __entry->client_id = 
> queue->userq_mgr->file->client_id;
> +                        __entry->queue_type = queue->queue_type;
> +                        ),
> +         TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, 
> fence=%llu:%llu",
> +                   __get_str(dev), __entry->client_id, __entry->queue_type, 
> __entry->doorbell_index,
> +                   __entry->fence_context,
> +                   __entry->fence_seqno)
> +);
> +
> +TRACE_EVENT(amdgpu_userq_job_done,
> +         TP_PROTO(struct amdgpu_userq_fence *fence),
> +         TP_ARGS(fence),
> +         TP_STRUCT__entry(
> +                          __field(u64, fence_context)
> +                          __field(u64, fence_seqno)
> +                          ),
> +         TP_fast_assign(
> +                        __entry->fence_context = fence->base.context;
> +                        __entry->fence_seqno = fence->base.seqno;
> +                        ),
> +         TP_printk("fence=%llu:%llu",
> +                   __entry->fence_context,
> +                   __entry->fence_seqno)
> +);
> +
> +TRACE_EVENT(amdgpu_userq_job_queue,
> +         TP_PROTO(struct device *device,
> +                  struct amdgpu_usermode_queue *queue),
> +         TP_ARGS(device, queue),
> +         TP_STRUCT__entry(__field(u64, context)
> +                          __string(dev, dev_name(device))
> +                          __field(u64, doorbell_index)
> +                          __field(u64, client_id)
> +                          __field(u32, queue_type)
> +                          ),
> +         TP_fast_assign(__assign_str(dev);
> +                        __entry->doorbell_index = queue->doorbell_index;
> +                        __entry->queue_type = queue->queue_type;
> +                        __entry->client_id = 
> queue->userq_mgr->file->client_id;
> +                        __entry->context = queue->fence_drv->context;
> +                       ),
> +         TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, 
> context=%llu",
> +                   __get_str(dev), __entry->client_id, __entry->queue_type,
> +                   __entry->doorbell_index, __entry->context)
> +);
> +
> +TRACE_EVENT(amdgpu_userq_job_add_dep,
> +         TP_PROTO(struct device *device, struct amdgpu_usermode_queue 
> *queue, struct amdgpu_userq_fence *dep),
> +         TP_ARGS(device, queue, dep),
> +         TP_STRUCT__entry(
> +                          __field(u64, context)
> +                          __field(u64, dep_context)
> +                          __field(u64, dep_seqno)
> +                          __string(dev, dev_name(device))
> +                          __field(u64, doorbell_index)
> +                          __field(u64, client_id)
> +                          __field(u32, queue_type)
> +                          ),
> +         TP_fast_assign(
> +                        __assign_str(dev);
> +                        __entry->doorbell_index = queue->doorbell_index;
> +                        __entry->queue_type = queue->queue_type;
> +                        __entry->client_id = 
> queue->userq_mgr->file->client_id;
> +                        __entry->context = queue->fence_drv->context;
> +                        __entry->dep_context = dep->base.context;
> +                        __entry->dep_seqno = dep->base.seqno;
> +                        ),
> +         TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, 
> context=%llu depends on fence=%llu:%llu",
> +                   __get_str(dev), __entry->client_id, __entry->queue_type, 
> __entry->doorbell_index, __entry->context,
> +                   __entry->dep_context,
> +                   __entry->dep_seqno)
> +);
> +
> +TRACE_EVENT(amdgpu_userq_state_start,
> +         TP_PROTO(struct amdgpu_usermode_queue *queue),
> +         TP_ARGS(queue),
> +         TP_STRUCT__entry(
> +                          __field(u64, doorbell_index)
> +                          __field(u64, client_id)
> +                          __field(u32, queue_type)
> +                          __field(u32, from)
> +                          ),
> +         TP_fast_assign(
> +                        __entry->doorbell_index = queue->doorbell_index;
> +                        __entry->queue_type = queue->queue_type;
> +                        __entry->client_id = 
> queue->userq_mgr->file->client_id;
> +                        __entry->from = queue->state;
> +                        ),
> +         TP_printk("client_id=%llu, type=%u, doorbell=%llu, from=%d",
> +                   __entry->client_id, __entry->queue_type, 
> __entry->doorbell_index, __entry->from)
> +);
> +
> +TRACE_EVENT(amdgpu_userq_state_changed,
> +         TP_PROTO(struct amdgpu_usermode_queue *queue, enum 
> amdgpu_userq_state new_state),
> +         TP_ARGS(queue, new_state),
> +         TP_STRUCT__entry(
> +                          __field(u64, doorbell_index)
> +                          __field(u64, client_id)
> +                          __field(u32, queue_type)
> +                          __field(u32, to)
> +                          ),
> +         TP_fast_assign(
> +                        __entry->doorbell_index = queue->doorbell_index;
> +                        __entry->queue_type = queue->queue_type;
> +                        __entry->client_id = 
> queue->userq_mgr->file->client_id;
> +                        __entry->to = new_state;
> +                        ),
> +         TP_printk("client_id=%llu, type=%u, doorbell=%llu, to=%d",
> +                   __entry->client_id, __entry->queue_type, 
> __entry->doorbell_index, __entry->to)
> +);
> +
>  #undef AMDGPU_JOB_GET_TIMELINE_NAME
>  #endif
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index e27f9a76f986..60d1186af286 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -329,11 +329,15 @@ static int amdgpu_userq_preempt_helper(struct 
> amdgpu_usermode_queue *queue)
>       int r;
>  
>       if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
> +             trace_amdgpu_userq_state_start(queue);
> +
>               r = userq_funcs->preempt(queue);
>               if (r) {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_HUNG);
>                       queue->state = AMDGPU_USERQ_STATE_HUNG;
>                       return r;
>               } else {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_PREEMPTED);
>                       queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
>               }
>       }
> @@ -349,10 +353,14 @@ static int amdgpu_userq_restore_helper(struct 
> amdgpu_usermode_queue *queue)
>       int r = 0;
>  
>       if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
> +             trace_amdgpu_userq_state_start(queue);
> +
>               r = userq_funcs->restore(queue);
>               if (r) {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_HUNG);
>                       queue->state = AMDGPU_USERQ_STATE_HUNG;
>               } else {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_MAPPED);
>                       queue->state = AMDGPU_USERQ_STATE_MAPPED;
>               }
>       }
> @@ -370,12 +378,15 @@ static int amdgpu_userq_unmap_helper(struct 
> amdgpu_usermode_queue *queue)
>  
>       if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
>           (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
> +             trace_amdgpu_userq_state_start(queue);
>  
>               r = userq_funcs->unmap(queue);
>               if (r) {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_HUNG);
>                       queue->state = AMDGPU_USERQ_STATE_HUNG;
>                       return r;
>               } else {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_UNMAPPED);
>                       queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
>               }
>       }
> @@ -392,11 +403,15 @@ static int amdgpu_userq_map_helper(struct 
> amdgpu_usermode_queue *queue)
>       int r;
>  
>       if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
> +             trace_amdgpu_userq_state_start(queue);
> +
>               r = userq_funcs->map(queue);
>               if (r) {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_HUNG);
>                       queue->state = AMDGPU_USERQ_STATE_HUNG;
>                       return r;
>               } else {
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_MAPPED);
>                       queue->state = AMDGPU_USERQ_STATE_MAPPED;
>               }
>       }
> @@ -1007,6 +1022,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr 
> *uq_mgr)
>               if (!amdgpu_userq_buffer_vas_mapped(queue)) {
>                       drm_file_err(uq_mgr->file,
>                                    "trying restore queue without va 
> mapping\n");
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_INVALID_VA);
>                       queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
>                       continue;
>               }
> @@ -1502,12 +1518,14 @@ void amdgpu_userq_pre_reset(struct amdgpu_device 
> *adev)
>               if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
>                       continue;
>  
> +             trace_amdgpu_userq_state_start(queue);
>               userq_funcs = adev->userq_funcs[queue->queue_type];
>               userq_funcs->unmap(queue);
>               /* just mark all queues as hung at this point.
>                * if unmap succeeds, we could map again
>                * in amdgpu_userq_post_reset() if vram is not lost
>                */
> +             trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_HUNG);
>               queue->state = AMDGPU_USERQ_STATE_HUNG;
>               amdgpu_userq_fence_driver_force_completion(queue);
>       }
> @@ -1526,6 +1544,8 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, 
> bool vram_lost)
>  
>       xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
>               if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
> +                     trace_amdgpu_userq_state_start(queue);
> +
>                       userq_funcs = adev->userq_funcs[queue->queue_type];
>                       /* Re-map queue */
>                       r = userq_funcs->map(queue);
> @@ -1533,6 +1553,7 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, 
> bool vram_lost)
>                               dev_err(adev->dev, "Failed to remap queue 
> %ld\n", queue_id);
>                               continue;
>                       }
> +                     trace_amdgpu_userq_state_changed(queue, 
> AMDGPU_USERQ_STATE_MAPPED);
>                       queue->state = AMDGPU_USERQ_STATE_MAPPED;
>               }
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 008330a0d852..00cc7194321c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -30,7 +30,7 @@
>  #include <drm/drm_syncobj.h>
>  
>  #include "amdgpu.h"
> -#include "amdgpu_userq_fence.h"
> +#include "amdgpu_trace.h"
>  
>  #define AMDGPU_USERQ_MAX_HANDLES     (1U << 16)
>  
> @@ -169,6 +169,7 @@ amdgpu_userq_fence_driver_process(struct 
> amdgpu_userq_fence_driver *fence_drv)
>               fence = &userq_fence->base;
>               list_del_init(&userq_fence->link);
>               dma_fence_signal(fence);
> +             trace_amdgpu_userq_job_done(userq_fence);
>               /* Drop fence_drv_array outside fence_list_lock
>                * to avoid the recursion lock.
>                */
> @@ -528,6 +529,8 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, 
> void *data,
>       /* Create the new fence */
>       amdgpu_userq_fence_init(queue, fence, wptr);
>  
> +     trace_amdgpu_userq_job_run(dev->dev, queue, fence);
> +
>       mutex_unlock(&userq_mgr->userq_mutex);
>  
>       /*
> @@ -701,7 +704,7 @@ amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait 
> *wait_info,
>  }
>  
>  static int
> -amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
> +amdgpu_userq_wait_return_fence_info(struct drm_device *dev, struct drm_file 
> *filp,
>                                   struct drm_amdgpu_userq_wait *wait_info,
>                                   u32 *syncobj_handles, u32 *timeline_points,
>                                   u32 *timeline_handles,
> @@ -835,6 +838,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
>               goto free_fences;
>       }
>  
> +     trace_amdgpu_userq_job_queue(dev->dev, waitq);
> +
>       for (i = 0, cnt = 0; i < num_fences; i++) {
>               struct amdgpu_userq_fence_driver *fence_drv;
>               struct amdgpu_userq_fence *userq_fence;
> @@ -869,6 +874,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
>  
>               amdgpu_userq_fence_driver_get(fence_drv);
>  
> +             trace_amdgpu_userq_job_add_dep(dev->dev, waitq, userq_fence);
> +
>               /* Store drm syncobj's gpu va address and value */
>               fence_info[cnt].va = fence_drv->va;
>               fence_info[cnt].value = fences[i]->seqno;
> @@ -968,7 +975,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
> *data,
>                                                  gobj_write,
>                                                  gobj_read);
>       } else {
> -             r = amdgpu_userq_wait_return_fence_info(filp, wait_info,
> +             r = amdgpu_userq_wait_return_fence_info(dev, filp, wait_info,
>                                                       syncobj_handles,
>                                                       timeline_points,
>                                                       timeline_handles,

Reply via email to