AMD General

As far as I can see, there's no such implementation on the drm-next branch yet. 
I came up with the idea of tracking the userq lifetime quite a while ago and 
also raised it in the dev discussion group. This would not only help profile 
userq latency but also troubleshoot userq sequencing issues. Hi 
@Pelloux-Prayer, Pierre-Eric, could you please share the status of your userq 
ftrace work, or is it supplementary to what you already have? Ideally, we 
should land such an implementation to properly track and profile the userq flow.


Regards,
      Prike

> -----Original Message-----
> From: Khatri, Sunil <[email protected]>
> Sent: Monday, May 11, 2026 9:57 PM
> To: Liang, Prike <[email protected]>; [email protected]
> Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
> <[email protected]>; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-
> [email protected]>
> Subject: Re: [PATCH 1/4] drm/amdgpu: add userq create and destroy tracepoints
>
> If i am not wrong Pierre eric did work on the traces for user queues. I have 
> dropped
> my patches for the same reason but not sure if the traces patches are merged. 
> Could
> you check with him once ?
>
> On 11-05-2026 07:24 pm, Prike Liang wrote:
> > Add ftrace events around user queue creation and destruction to
> > profile queue setup and teardown latency.
> >
> > Signed-off-by: Prike Liang <[email protected]>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 58
> +++++++++++++++++++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 +++++
> >   2 files changed, 69 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > index d13e64a69e25..5a01f63d1f32 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > @@ -578,6 +578,64 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
> >                   __entry->value)
> >   );
> >
> > +DECLARE_EVENT_CLASS(amdgpu_userq_queue,
> > +       TP_PROTO(struct amdgpu_usermode_queue *queue),
> > +       TP_ARGS(queue),
> > +       TP_STRUCT__entry(
> > +                        __field(struct amdgpu_usermode_queue *, queue)
> > +                        __field(u64, doorbell_index)
> > +                        __field(int, queue_type)
> > +                        __field(int, state)
> > +                        __field(u32, xcp_id)
> > +                        ),
> > +       TP_fast_assign(
> > +                      __entry->queue = queue;
> > +                      __entry->doorbell_index = queue ? 
> > queue->doorbell_index :
> 0;
> > +                      __entry->queue_type = queue ? queue->queue_type : -1;
> > +                      __entry->state = queue ? queue->state : -1;
> > +                      __entry->xcp_id = queue ? queue->xcp_id : 0;
> > +                      ),
> > +       TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u",
> > +                 __entry->queue, __entry->doorbell_index,
> > +                 __entry->queue_type, __entry->state, __entry->xcp_id) );
> > +DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_create_start,
> > +        TP_PROTO(struct amdgpu_usermode_queue *queue),
> > +        TP_ARGS(queue));
> > +DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_destroy_start,
> > +        TP_PROTO(struct amdgpu_usermode_queue *queue),
> > +        TP_ARGS(queue));
> > +DECLARE_EVENT_CLASS(amdgpu_userq_queue_result,
> > +       TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> > +       TP_ARGS(queue, result),
> > +       TP_STRUCT__entry(
> > +                        __field(struct amdgpu_usermode_queue *, queue)
> > +                        __field(u64, doorbell_index)
> > +                        __field(int, queue_type)
> > +                        __field(int, state)
> > +                        __field(u32, xcp_id)
> > +                        __field(int, result)
> > +                        ),
> > +       TP_fast_assign(
> > +                      __entry->queue = queue;
> > +                      __entry->doorbell_index = queue ? 
> > queue->doorbell_index :
> 0;
> > +                      __entry->queue_type = queue ? queue->queue_type : -1;
> > +                      __entry->state = queue ? queue->state : -1;
> > +                      __entry->xcp_id = queue ? queue->xcp_id : 0;
> > +                      __entry->result = result;
> > +                      ),
> > +       TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u,
> result=%d",
> > +                 __entry->queue, __entry->doorbell_index,
> > +                 __entry->queue_type, __entry->state,
> > +                 __entry->xcp_id, __entry->result) );
> > +DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_create_end,
> > +        TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> > +        TP_ARGS(queue, result));
> > +DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
> > +        TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> > +        TP_ARGS(queue, result));
> > +
> >   #undef AMDGPU_JOB_GET_TIMELINE_NAME
> >   #endif
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > index 3077ca4e27a0..50c46d31fbae 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > @@ -33,6 +33,7 @@
> >   #include "amdgpu_userq.h"
> >   #include "amdgpu_hmm.h"
> >   #include "amdgpu_userq_fence.h"
> > +#include "amdgpu_trace.h"
> >
> >   u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
> >   {
> > @@ -617,6 +618,8 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr
> > *uq_mgr, struct amdgpu_usermode_que
> >
> >     int r = 0;
> >
> > +   trace_amdgpu_userq_destroy_start(queue);
> > +
> >     cancel_delayed_work_sync(&uq_mgr->resume_work);
> >
> >     /* Cancel any pending hang detection work and cleanup */ @@ -625,6
> > +628,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct
> amdgpu_usermode_que
> >     r = amdgpu_bo_reserve(vm->root.bo, false);
> >     if (r) {
> >             drm_file_err(uq_mgr->file, "Failed to reserve root bo during
> > userqueue destroy\n");
> > +           trace_amdgpu_userq_destroy_end(queue, r);
> >             return r;
> >     }
> >     amdgpu_userq_buffer_vas_list_cleanup(adev, queue); @@ -650,6 +654,7
> > @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct
> amdgpu_usermode_que
> >     amdgpu_bo_unpin(queue->wptr_obj.obj);
> >     amdgpu_bo_unreserve(queue->wptr_obj.obj);
> >     amdgpu_bo_unref(&queue->wptr_obj.obj);
> > +   trace_amdgpu_userq_destroy_end(queue, r);
> >     kfree(queue);
> >
> >     pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > @@ -754,6 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> > drm_amdgpu_userq *args)
> >
> >     mutex_init(&queue->fence_drv_lock);
> >     xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
> > +   trace_amdgpu_userq_create_start(queue);
> >     r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
> >     if (r)
> >             goto free_queue;
> > @@ -809,6 +815,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> >              * This drops the extra and last reference which should take
> >              * care of all cleanup.
> >              */
> > +           trace_amdgpu_userq_create_end(queue, r);
> >             amdgpu_userq_put(queue);
> >             amdgpu_userq_put(queue);
> >             return r;
> > @@ -826,6 +833,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> >             r = amdgpu_userq_map_helper(queue);
> >             if (r) {
> >                     drm_file_err(uq_mgr->file, "Failed to map Queue\n");
> > +                   trace_amdgpu_userq_create_end(queue, r);
> >                     mutex_unlock(&uq_mgr->userq_mutex);
> >                     /* Prevent racing with close */
> >                     if (xa_erase(&uq_mgr->userq_xa, qid) == queue) @@ -839,6
> +847,7
> > @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
> >     atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
> >
> >     amdgpu_debugfs_userq_init(filp, queue, qid);
> > +   trace_amdgpu_userq_create_end(queue, 0);
> >     amdgpu_userq_put(queue);
> >     args->out.queue_id = qid;
> >     return 0;
> > @@ -853,6 +862,8 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> >   free_fence_drv:
> >     amdgpu_userq_fence_driver_free(queue);
> >   free_queue:
> > +   if (queue)
> > +           trace_amdgpu_userq_create_end(queue, r);
> >     kfree(queue);
> >   err_pm_runtime:
> >     pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);

Reply via email to