Le 11/05/2026 à 15:57, Khatri, Sunil a écrit :
If i am not wrong Pierre eric did work on the traces for user queues. I have dropped my patches for the same reason but not sure if the traces patches are merged. Could you check with him once ?

No my patches aren't merged; their scope is to expose something similar to gpu_scheduler events to be able to observe user queues activity.


On 11-05-2026 07:24 pm, Prike Liang wrote:
Add ftrace events around user queue creation and destruction to profile
queue setup and teardown latency.

IMO these events look like something that could be done with the function tracer (optionally using the func-args feature) by tracing amdgpu_userq_destroy / amdgpu_userq_create entry and exit.

Pierre-Eric



Signed-off-by: Prike Liang <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 58 +++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 +++++
  2 files changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d13e64a69e25..5a01f63d1f32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -578,6 +578,64 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
                __entry->value)
  );
+DECLARE_EVENT_CLASS(amdgpu_userq_queue,
+        TP_PROTO(struct amdgpu_usermode_queue *queue),
+        TP_ARGS(queue),
+        TP_STRUCT__entry(
+                 __field(struct amdgpu_usermode_queue *, queue)
+                 __field(u64, doorbell_index)
+                 __field(int, queue_type)
+                 __field(int, state)
+                 __field(u32, xcp_id)
+                 ),
+        TP_fast_assign(
+               __entry->queue = queue;
+               __entry->doorbell_index = queue ? queue->doorbell_index : 0;
+               __entry->queue_type = queue ? queue->queue_type : -1;
+               __entry->state = queue ? queue->state : -1;
+               __entry->xcp_id = queue ? queue->xcp_id : 0;
+               ),
+        TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u",
+              __entry->queue, __entry->doorbell_index,
+              __entry->queue_type, __entry->state, __entry->xcp_id)
+);
+DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_create_start,
+         TP_PROTO(struct amdgpu_usermode_queue *queue),
+         TP_ARGS(queue));
+DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_destroy_start,
+         TP_PROTO(struct amdgpu_usermode_queue *queue),
+         TP_ARGS(queue));
+DECLARE_EVENT_CLASS(amdgpu_userq_queue_result,
+        TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+        TP_ARGS(queue, result),
+        TP_STRUCT__entry(
+                 __field(struct amdgpu_usermode_queue *, queue)
+                 __field(u64, doorbell_index)
+                 __field(int, queue_type)
+                 __field(int, state)
+                 __field(u32, xcp_id)
+                 __field(int, result)
+                 ),
+        TP_fast_assign(
+               __entry->queue = queue;
+               __entry->doorbell_index = queue ? queue->doorbell_index : 0;
+               __entry->queue_type = queue ? queue->queue_type : -1;
+               __entry->state = queue ? queue->state : -1;
+               __entry->xcp_id = queue ? queue->xcp_id : 0;
+               __entry->result = result;
+               ),
+        TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u, 
result=%d",
+              __entry->queue, __entry->doorbell_index,
+              __entry->queue_type, __entry->state,
+              __entry->xcp_id, __entry->result)
+);
+DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_create_end,
+         TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+         TP_ARGS(queue, result));
+DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
+         TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+         TP_ARGS(queue, result));
+
  #undef AMDGPU_JOB_GET_TIMELINE_NAME
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 3077ca4e27a0..50c46d31fbae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -33,6 +33,7 @@
  #include "amdgpu_userq.h"
  #include "amdgpu_hmm.h"
  #include "amdgpu_userq_fence.h"
+#include "amdgpu_trace.h"
  u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
  {
@@ -617,6 +618,8 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
struct amdgpu_usermode_que
      int r = 0;
+    trace_amdgpu_userq_destroy_start(queue);
+
      cancel_delayed_work_sync(&uq_mgr->resume_work);
      /* Cancel any pending hang detection work and cleanup */
@@ -625,6 +628,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
struct amdgpu_usermode_que
      r = amdgpu_bo_reserve(vm->root.bo, false);
      if (r) {
          drm_file_err(uq_mgr->file, "Failed to reserve root bo during userqueue 
destroy\n");
+        trace_amdgpu_userq_destroy_end(queue, r);
          return r;
      }
      amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
@@ -650,6 +654,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
struct amdgpu_usermode_que
      amdgpu_bo_unpin(queue->wptr_obj.obj);
      amdgpu_bo_unreserve(queue->wptr_obj.obj);
      amdgpu_bo_unref(&queue->wptr_obj.obj);
+    trace_amdgpu_userq_destroy_end(queue, r);
      kfree(queue);
      pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -754,6 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
      mutex_init(&queue->fence_drv_lock);
      xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+    trace_amdgpu_userq_create_start(queue);
      r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
      if (r)
          goto free_queue;
@@ -809,6 +815,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
           * This drops the extra and last reference which should take
           * care of all cleanup.
           */
+        trace_amdgpu_userq_create_end(queue, r);
          amdgpu_userq_put(queue);
          amdgpu_userq_put(queue);
          return r;
@@ -826,6 +833,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
          r = amdgpu_userq_map_helper(queue);
          if (r) {
              drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+            trace_amdgpu_userq_create_end(queue, r);
              mutex_unlock(&uq_mgr->userq_mutex);
              /* Prevent racing with close */
              if (xa_erase(&uq_mgr->userq_xa, qid) == queue)
@@ -839,6 +847,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
      atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
      amdgpu_debugfs_userq_init(filp, queue, qid);
+    trace_amdgpu_userq_create_end(queue, 0);
      amdgpu_userq_put(queue);
      args->out.queue_id = qid;
      return 0;
@@ -853,6 +862,8 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
  free_fence_drv:
      amdgpu_userq_fence_driver_free(queue);
  free_queue:
+    if (queue)
+        trace_amdgpu_userq_create_end(queue, r);
      kfree(queue);
  err_pm_runtime:
      pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);

Reply via email to