On 5/20/26 08:38, Prike Liang wrote:
> Add ftrace events around user queue creation and destruction to profile
> queue setup and teardown latency.
> 
> Signed-off-by: Prike Liang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 58 +++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 +++++
>  2 files changed, 69 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index d13e64a69e25..5a01f63d1f32 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -578,6 +578,64 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
>                     __entry->value)
>  );
>  
> +DECLARE_EVENT_CLASS(amdgpu_userq_queue,
> +         TP_PROTO(struct amdgpu_usermode_queue *queue),
> +         TP_ARGS(queue),
> +         TP_STRUCT__entry(
> +                          __field(struct amdgpu_usermode_queue *, queue)

Please don't add object pointers in trace points, those are usually already 
invalid when the printing happens.

If you really want the kernel pointer in the trace then use void* instead.

> +                          __field(u64, doorbell_index)
> +                          __field(int, queue_type)
> +                          __field(int, state)
> +                          __field(u32, xcp_id)
> +                          ),
> +         TP_fast_assign(
> +                        __entry->queue = queue;
> +                        __entry->doorbell_index = queue ? 
> queue->doorbell_index : 0;
> +                        __entry->queue_type = queue ? queue->queue_type : -1;
> +                        __entry->state = queue ? queue->state : -1;
> +                        __entry->xcp_id = queue ? queue->xcp_id : 0;

The queue is a mandatory parameter, please remove all the NULL checks.

> +                        ),
> +         TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u",
> +                   __entry->queue, __entry->doorbell_index,
> +                   __entry->queue_type, __entry->state, __entry->xcp_id)
> +);
> +DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_create_start,
> +          TP_PROTO(struct amdgpu_usermode_queue *queue),
> +          TP_ARGS(queue));
> +DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_destroy_start,
> +          TP_PROTO(struct amdgpu_usermode_queue *queue),
> +          TP_ARGS(queue));
> +DECLARE_EVENT_CLASS(amdgpu_userq_queue_result,
> +         TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> +         TP_ARGS(queue, result),
> +         TP_STRUCT__entry(
> +                          __field(struct amdgpu_usermode_queue *, queue)

Dito.

> +                          __field(u64, doorbell_index)
> +                          __field(int, queue_type)
> +                          __field(int, state)
> +                          __field(u32, xcp_id)
> +                          __field(int, result)
> +                          ),
> +         TP_fast_assign(
> +                        __entry->queue = queue;
> +                        __entry->doorbell_index = queue ? 
> queue->doorbell_index : 0;
> +                        __entry->queue_type = queue ? queue->queue_type : -1;
> +                        __entry->state = queue ? queue->state : -1;
> +                        __entry->xcp_id = queue ? queue->xcp_id : 0;

Dito.

> +                        __entry->result = result;
> +                        ),
> +         TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u, 
> result=%d",
> +                   __entry->queue, __entry->doorbell_index,
> +                   __entry->queue_type, __entry->state,
> +                   __entry->xcp_id, __entry->result)
> +);
> +DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_create_end,
> +          TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> +          TP_ARGS(queue, result));
> +DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
> +          TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> +          TP_ARGS(queue, result));
> +
>  #undef AMDGPU_JOB_GET_TIMELINE_NAME
>  #endif
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 3bfb9ae2cb3a..e27f9a76f986 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -33,6 +33,7 @@
>  #include "amdgpu_userq.h"
>  #include "amdgpu_hmm.h"
>  #include "amdgpu_userq_fence.h"
> +#include "amdgpu_trace.h"
>  
>  u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
>  {
> @@ -613,6 +614,8 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
> struct amdgpu_usermode_que
>  
>       int r = 0;
>  
> +     trace_amdgpu_userq_destroy_start(queue);
> +
>       cancel_delayed_work_sync(&uq_mgr->resume_work);
>  
>       /* Cancel any pending hang detection work and cleanup */
> @@ -621,6 +624,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
> struct amdgpu_usermode_que
>       r = amdgpu_bo_reserve(vm->root.bo, false);

>       if (r) {
>               drm_file_err(uq_mgr->file, "Failed to reserve root bo during 
> userqueue destroy\n");
> +             trace_amdgpu_userq_destroy_end(queue, r);
>               return r;
>       }

That error handling should be removed.

>       amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
> @@ -646,6 +650,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
> struct amdgpu_usermode_que
>       amdgpu_bo_unpin(queue->wptr_obj.obj);
>       amdgpu_bo_unreserve(queue->wptr_obj.obj);
>       amdgpu_bo_unref(&queue->wptr_obj.obj);
> +     trace_amdgpu_userq_destroy_end(queue, r);
>       kfree(queue);
>  
>       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> @@ -748,6 +753,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
> drm_amdgpu_userq *args)
>       INIT_DELAYED_WORK(&queue->hang_detect_work,
>                         amdgpu_userq_hang_detect_work);
>  
> +     trace_amdgpu_userq_create_start(queue);
>       r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
>       if (r)
>               goto free_queue;
> @@ -807,6 +813,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
> drm_amdgpu_userq *args)
>               r = amdgpu_userq_map_helper(queue);
>               if (r) {
>                       drm_file_err(uq_mgr->file, "Failed to map Queue\n");
> +                     trace_amdgpu_userq_create_end(queue, r);
>                       mutex_unlock(&uq_mgr->userq_mutex);
>                       goto erase_doorbell;
>               }
> @@ -823,11 +830,13 @@ amdgpu_userq_create(struct drm_file *filp, union 
> drm_amdgpu_userq *args)
>                * This drops the last reference which should take care of
>                * all cleanup.
>                */
> +             trace_amdgpu_userq_create_end(queue, r);
>               amdgpu_userq_put(queue);
>               return r;
>       }
>  
>       amdgpu_debugfs_userq_init(filp, queue, qid);
> +     trace_amdgpu_userq_create_end(queue, 0);
>       args->out.queue_id = qid;
>       return 0;
>  
> @@ -847,6 +856,8 @@ amdgpu_userq_create(struct drm_file *filp, union 
> drm_amdgpu_userq *args)
>  free_fence_drv:
>       amdgpu_userq_fence_driver_free(queue);
>  free_queue:
> +     if (queue)

Please remove the NULL check.

Regards,
Christian.

> +             trace_amdgpu_userq_create_end(queue, r);
>       kfree(queue);
>  err_pm_runtime:
>       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);

Reply via email to