On 5/20/26 08:38, Prike Liang wrote: > From: Pierre-Eric Pelloux-Prayer <[email protected]> > > Add ftrace events for tracking the userq fence emit, signal > and queue state transition.
The queue trace points look good to me, but clear NAK to the fence trace points those just duplicates the common trace points in the dma_fence framework. Regards, Christian. > > Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> > Signed-off-by: Prike Liang <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 129 ++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 21 +++ > .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 13 +- > 3 files changed, 160 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > index 4ff8a4d7bb8b..32d8c36caaf3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > @@ -28,6 +28,8 @@ > #include <linux/types.h> > #include <linux/tracepoint.h> > > +#include "amdgpu_userq_fence.h" > + > #undef TRACE_SYSTEM > #define TRACE_SYSTEM amdgpu > #define TRACE_INCLUDE_FILE amdgpu_trace > @@ -659,6 +661,133 @@ DEFINE_EVENT(amdgpu_userq_eviction_fence, > amdgpu_userq_eviction_fence_enable_sig > DEFINE_EVENT(amdgpu_userq_eviction_fence, amdgpu_userq_eviction_fence_signal, > TP_PROTO(u64 context, u64 seqno), > TP_ARGS(context, seqno)); > +TRACE_EVENT(amdgpu_userq_job_run, > + TP_PROTO(struct device *device, struct amdgpu_usermode_queue > *queue, struct amdgpu_userq_fence *fence), > + TP_ARGS(device, queue, fence), > + TP_STRUCT__entry( > + __field(u64, fence_context) > + __field(u64, fence_seqno) > + __string(dev, dev_name(device)) > + __field(u64, doorbell_index) > + __field(u64, client_id) > + __field(u32, queue_type) > + ), > + TP_fast_assign( > + __entry->fence_context = fence->base.context; > + __entry->fence_seqno = fence->base.seqno; > + __assign_str(dev); > + __entry->doorbell_index = queue->doorbell_index; > + __entry->client_id = > queue->userq_mgr->file->client_id; > + __entry->queue_type = queue->queue_type; > + ), > + TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, > fence=%llu:%llu", > + __get_str(dev), __entry->client_id, __entry->queue_type, > __entry->doorbell_index, > + __entry->fence_context, > + __entry->fence_seqno) > +); > + > +TRACE_EVENT(amdgpu_userq_job_done, > + TP_PROTO(struct amdgpu_userq_fence *fence), > + TP_ARGS(fence), > + TP_STRUCT__entry( > + __field(u64, fence_context) > + __field(u64, fence_seqno) > + ), > + TP_fast_assign( > + __entry->fence_context = fence->base.context; > + __entry->fence_seqno = fence->base.seqno; > + ), > + TP_printk("fence=%llu:%llu", > + __entry->fence_context, > + __entry->fence_seqno) > +); > + > +TRACE_EVENT(amdgpu_userq_job_queue, > + TP_PROTO(struct device *device, > + struct amdgpu_usermode_queue *queue), > + TP_ARGS(device, queue), > + TP_STRUCT__entry(__field(u64, context) > + __string(dev, dev_name(device)) > + __field(u64, doorbell_index) > + __field(u64, client_id) > + __field(u32, queue_type) > + ), > + TP_fast_assign(__assign_str(dev); > + __entry->doorbell_index = queue->doorbell_index; > + __entry->queue_type = queue->queue_type; > + __entry->client_id = > queue->userq_mgr->file->client_id; > + __entry->context = queue->fence_drv->context; > + ), > + TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, > context=%llu", > + __get_str(dev), __entry->client_id, __entry->queue_type, > + __entry->doorbell_index, __entry->context) > +); > + > +TRACE_EVENT(amdgpu_userq_job_add_dep, > + TP_PROTO(struct device *device, struct amdgpu_usermode_queue > *queue, struct amdgpu_userq_fence *dep), > + TP_ARGS(device, queue, dep), > + TP_STRUCT__entry( > + __field(u64, context) > + __field(u64, dep_context) > + __field(u64, dep_seqno) > + __string(dev, dev_name(device)) > + __field(u64, doorbell_index) > + __field(u64, client_id) > + __field(u32, queue_type) > + ), > + TP_fast_assign( > + __assign_str(dev); > + __entry->doorbell_index = queue->doorbell_index; > + __entry->queue_type = queue->queue_type; > + __entry->client_id = > queue->userq_mgr->file->client_id; > + __entry->context = queue->fence_drv->context; > + __entry->dep_context = dep->base.context; > + __entry->dep_seqno = dep->base.seqno; > + ), > + TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, > context=%llu depends on fence=%llu:%llu", > + __get_str(dev), __entry->client_id, __entry->queue_type, > __entry->doorbell_index, __entry->context, > + __entry->dep_context, > + __entry->dep_seqno) > +); > + > +TRACE_EVENT(amdgpu_userq_state_start, > + TP_PROTO(struct amdgpu_usermode_queue *queue), > + TP_ARGS(queue), > + TP_STRUCT__entry( > + __field(u64, doorbell_index) > + __field(u64, client_id) > + __field(u32, queue_type) > + __field(u32, from) > + ), > + TP_fast_assign( > + __entry->doorbell_index = queue->doorbell_index; > + __entry->queue_type = queue->queue_type; > + __entry->client_id = > queue->userq_mgr->file->client_id; > + __entry->from = queue->state; > + ), > + TP_printk("client_id=%llu, type=%u, doorbell=%llu, from=%d", > + __entry->client_id, __entry->queue_type, > __entry->doorbell_index, __entry->from) > +); > + > +TRACE_EVENT(amdgpu_userq_state_changed, > + TP_PROTO(struct amdgpu_usermode_queue *queue, enum > amdgpu_userq_state new_state), > + TP_ARGS(queue, new_state), > + TP_STRUCT__entry( > + __field(u64, doorbell_index) > + __field(u64, client_id) > + __field(u32, queue_type) > + __field(u32, to) > + ), > + TP_fast_assign( > + __entry->doorbell_index = queue->doorbell_index; > + __entry->queue_type = queue->queue_type; > + __entry->client_id = > queue->userq_mgr->file->client_id; > + __entry->to = new_state; > + ), > + TP_printk("client_id=%llu, type=%u, doorbell=%llu, to=%d", > + __entry->client_id, __entry->queue_type, > __entry->doorbell_index, __entry->to) > +); > + > #undef AMDGPU_JOB_GET_TIMELINE_NAME > #endif > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > index e27f9a76f986..60d1186af286 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > @@ -329,11 +329,15 @@ static int amdgpu_userq_preempt_helper(struct > amdgpu_usermode_queue *queue) > int r; > > if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { > + trace_amdgpu_userq_state_start(queue); > + > r = userq_funcs->preempt(queue); > if (r) { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_HUNG); > queue->state = AMDGPU_USERQ_STATE_HUNG; > return r; > } else { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_PREEMPTED); > queue->state = AMDGPU_USERQ_STATE_PREEMPTED; > } > } > @@ -349,10 +353,14 @@ static int amdgpu_userq_restore_helper(struct > amdgpu_usermode_queue *queue) > int r = 0; > > if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { > + trace_amdgpu_userq_state_start(queue); > + > r = userq_funcs->restore(queue); > if (r) { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_HUNG); > queue->state = AMDGPU_USERQ_STATE_HUNG; > } else { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_MAPPED); > queue->state = AMDGPU_USERQ_STATE_MAPPED; > } > } > @@ -370,12 +378,15 @@ static int amdgpu_userq_unmap_helper(struct > amdgpu_usermode_queue *queue) > > if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || > (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { > + trace_amdgpu_userq_state_start(queue); > > r = userq_funcs->unmap(queue); > if (r) { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_HUNG); > queue->state = AMDGPU_USERQ_STATE_HUNG; > return r; > } else { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_UNMAPPED); > queue->state = AMDGPU_USERQ_STATE_UNMAPPED; > } > } > @@ -392,11 +403,15 @@ static int amdgpu_userq_map_helper(struct > amdgpu_usermode_queue *queue) > int r; > > if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { > + trace_amdgpu_userq_state_start(queue); > + > r = userq_funcs->map(queue); > if (r) { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_HUNG); > queue->state = AMDGPU_USERQ_STATE_HUNG; > return r; > } else { > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_MAPPED); > queue->state = AMDGPU_USERQ_STATE_MAPPED; > } > } > @@ -1007,6 +1022,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr > *uq_mgr) > if (!amdgpu_userq_buffer_vas_mapped(queue)) { > drm_file_err(uq_mgr->file, > "trying restore queue without va > mapping\n"); > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_INVALID_VA); > queue->state = AMDGPU_USERQ_STATE_INVALID_VA; > continue; > } > @@ -1502,12 +1518,14 @@ void amdgpu_userq_pre_reset(struct amdgpu_device > *adev) > if (queue->state != AMDGPU_USERQ_STATE_MAPPED) > continue; > > + trace_amdgpu_userq_state_start(queue); > userq_funcs = adev->userq_funcs[queue->queue_type]; > userq_funcs->unmap(queue); > /* just mark all queues as hung at this point. > * if unmap succeeds, we could map again > * in amdgpu_userq_post_reset() if vram is not lost > */ > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_HUNG); > queue->state = AMDGPU_USERQ_STATE_HUNG; > amdgpu_userq_fence_driver_force_completion(queue); > } > @@ -1526,6 +1544,8 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, > bool vram_lost) > > xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { > if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) { > + trace_amdgpu_userq_state_start(queue); > + > userq_funcs = adev->userq_funcs[queue->queue_type]; > /* Re-map queue */ > r = userq_funcs->map(queue); > @@ -1533,6 +1553,7 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, > bool vram_lost) > dev_err(adev->dev, "Failed to remap queue > %ld\n", queue_id); > continue; > } > + trace_amdgpu_userq_state_changed(queue, > AMDGPU_USERQ_STATE_MAPPED); > queue->state = AMDGPU_USERQ_STATE_MAPPED; > } > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > index 008330a0d852..00cc7194321c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > @@ -30,7 +30,7 @@ > #include <drm/drm_syncobj.h> > > #include "amdgpu.h" > -#include "amdgpu_userq_fence.h" > +#include "amdgpu_trace.h" > > #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) > > @@ -169,6 +169,7 @@ amdgpu_userq_fence_driver_process(struct > amdgpu_userq_fence_driver *fence_drv) > fence = &userq_fence->base; > list_del_init(&userq_fence->link); > dma_fence_signal(fence); > + trace_amdgpu_userq_job_done(userq_fence); > /* Drop fence_drv_array outside fence_list_lock > * to avoid the recursion lock. > */ > @@ -528,6 +529,8 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, > void *data, > /* Create the new fence */ > amdgpu_userq_fence_init(queue, fence, wptr); > > + trace_amdgpu_userq_job_run(dev->dev, queue, fence); > + > mutex_unlock(&userq_mgr->userq_mutex); > > /* > @@ -701,7 +704,7 @@ amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait > *wait_info, > } > > static int > -amdgpu_userq_wait_return_fence_info(struct drm_file *filp, > +amdgpu_userq_wait_return_fence_info(struct drm_device *dev, struct drm_file > *filp, > struct drm_amdgpu_userq_wait *wait_info, > u32 *syncobj_handles, u32 *timeline_points, > u32 *timeline_handles, > @@ -835,6 +838,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, > goto free_fences; > } > > + trace_amdgpu_userq_job_queue(dev->dev, waitq); > + > for (i = 0, cnt = 0; i < num_fences; i++) { > struct amdgpu_userq_fence_driver *fence_drv; > struct amdgpu_userq_fence *userq_fence; > @@ -869,6 +874,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, > > amdgpu_userq_fence_driver_get(fence_drv); > > + trace_amdgpu_userq_job_add_dep(dev->dev, waitq, userq_fence); > + > /* Store drm syncobj's gpu va address and value */ > fence_info[cnt].va = fence_drv->va; > fence_info[cnt].value = fences[i]->seqno; > @@ -968,7 +975,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void > *data, > gobj_write, > gobj_read); > } else { > - r = amdgpu_userq_wait_return_fence_info(filp, wait_info, > + r = amdgpu_userq_wait_return_fence_info(dev, filp, wait_info, > syncobj_handles, > timeline_points, > timeline_handles,
