Add ftrace events around user queue creation and destruction to profile
queue setup and teardown latency.
Signed-off-by: Prike Liang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 58 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 +++++
2 files changed, 69 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d13e64a69e25..5a01f63d1f32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -578,6 +578,64 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
__entry->value)
);
+DECLARE_EVENT_CLASS(amdgpu_userq_queue,
+ TP_PROTO(struct amdgpu_usermode_queue *queue),
+ TP_ARGS(queue),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_usermode_queue *, queue)
+ __field(u64, doorbell_index)
+ __field(int, queue_type)
+ __field(int, state)
+ __field(u32, xcp_id)
+ ),
+ TP_fast_assign(
+ __entry->queue = queue;
+ __entry->doorbell_index = queue ?
queue->doorbell_index : 0;
+ __entry->queue_type = queue ? queue->queue_type : -1;
+ __entry->state = queue ? queue->state : -1;
+ __entry->xcp_id = queue ? queue->xcp_id : 0;
+ ),
+ TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u",
+ __entry->queue, __entry->doorbell_index,
+ __entry->queue_type, __entry->state, __entry->xcp_id)
+);
+DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_create_start,
+ TP_PROTO(struct amdgpu_usermode_queue *queue),
+ TP_ARGS(queue));
+DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_destroy_start,
+ TP_PROTO(struct amdgpu_usermode_queue *queue),
+ TP_ARGS(queue));
+DECLARE_EVENT_CLASS(amdgpu_userq_queue_result,
+ TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+ TP_ARGS(queue, result),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_usermode_queue *, queue)
+ __field(u64, doorbell_index)
+ __field(int, queue_type)
+ __field(int, state)
+ __field(u32, xcp_id)
+ __field(int, result)
+ ),
+ TP_fast_assign(
+ __entry->queue = queue;
+ __entry->doorbell_index = queue ?
queue->doorbell_index : 0;
+ __entry->queue_type = queue ? queue->queue_type : -1;
+ __entry->state = queue ? queue->state : -1;
+ __entry->xcp_id = queue ? queue->xcp_id : 0;
+ __entry->result = result;
+ ),
+ TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u,
result=%d",
+ __entry->queue, __entry->doorbell_index,
+ __entry->queue_type, __entry->state,
+ __entry->xcp_id, __entry->result)
+);
+DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_create_end,
+ TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+ TP_ARGS(queue, result));
+DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
+ TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+ TP_ARGS(queue, result));
+
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 3077ca4e27a0..50c46d31fbae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -33,6 +33,7 @@
#include "amdgpu_userq.h"
#include "amdgpu_hmm.h"
#include "amdgpu_userq_fence.h"
+#include "amdgpu_trace.h"
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
{
@@ -617,6 +618,8 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_que
int r = 0;
+ trace_amdgpu_userq_destroy_start(queue);
+
cancel_delayed_work_sync(&uq_mgr->resume_work);
/* Cancel any pending hang detection work and cleanup */
@@ -625,6 +628,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_que
r = amdgpu_bo_reserve(vm->root.bo, false);
if (r) {
drm_file_err(uq_mgr->file, "Failed to reserve root bo during
userqueue destroy\n");
+ trace_amdgpu_userq_destroy_end(queue, r);
return r;
}
amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
@@ -650,6 +654,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_que
amdgpu_bo_unpin(queue->wptr_obj.obj);
amdgpu_bo_unreserve(queue->wptr_obj.obj);
amdgpu_bo_unref(&queue->wptr_obj.obj);
+ trace_amdgpu_userq_destroy_end(queue, r);
kfree(queue);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -754,6 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
mutex_init(&queue->fence_drv_lock);
xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ trace_amdgpu_userq_create_start(queue);
r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
if (r)
goto free_queue;
@@ -809,6 +815,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
* This drops the extra and last reference which should take
* care of all cleanup.
*/
+ trace_amdgpu_userq_create_end(queue, r);
amdgpu_userq_put(queue);
amdgpu_userq_put(queue);
return r;
@@ -826,6 +833,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
r = amdgpu_userq_map_helper(queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+ trace_amdgpu_userq_create_end(queue, r);
mutex_unlock(&uq_mgr->userq_mutex);
/* Prevent racing with close */
if (xa_erase(&uq_mgr->userq_xa, qid) == queue)
@@ -839,6 +847,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
amdgpu_debugfs_userq_init(filp, queue, qid);
+ trace_amdgpu_userq_create_end(queue, 0);
amdgpu_userq_put(queue);
args->out.queue_id = qid;
return 0;
@@ -853,6 +862,8 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
free_fence_drv:
amdgpu_userq_fence_driver_free(queue);
free_queue:
+ if (queue)
+ trace_amdgpu_userq_create_end(queue, r);
kfree(queue);
err_pm_runtime:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);