Module: Mesa Branch: main Commit: 020baed66e4ffe4595de2236d32562d74a6d66b0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=020baed66e4ffe4595de2236d32562d74a6d66b0
Author: Emma Anholt <[email protected]> Date: Wed Mar 8 12:30:24 2023 -0800 tu/perfetto: Use tu_CmdBeginDebugUtilsLabelEXT as a stage event in perfetto. This lets zink mark points of interest (particularly its barriers and blits) with some useful data, for presenting in perfetto traces. Closes: #8487 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22278> --- src/freedreno/vulkan/tu_device.cc | 52 +++++++++++++++++++++ src/freedreno/vulkan/tu_perfetto.cc | 84 ++++++++++++++++++++++++++++++++-- src/freedreno/vulkan/tu_perfetto.h | 4 ++ src/freedreno/vulkan/tu_tracepoints.py | 9 ++++ src/util/perf/u_perfetto_renderpass.h | 3 ++ 5 files changed, 149 insertions(+), 3 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 4d503e57073..7497a182bf0 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -3459,3 +3459,55 @@ tu_debug_bos_print_stats(struct tu_device *dev) mtx_unlock(&dev->bo_mutex); } + +void +tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer, + const VkDebugUtilsLabelEXT *pLabelInfo) +{ + VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer); + + vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo); + + /* Note that the spec says: + * + * "An application may open a debug label region in one command buffer and + * close it in another, or otherwise split debug label regions across + * multiple command buffers or multiple queue submissions. When viewed + * from the linear series of submissions to a single queue, the calls to + * vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be + * matched and balanced." + * + * But if you're beginning labeling during a renderpass and ending outside + * it, or vice versa, these trace ranges in perfetto will be unbalanced. I + * expect that u_trace and perfetto will do something like take just one of + * the begins/ends, or drop the event entirely, but not crash. Similarly, + * I think we'll have problems if the tracepoints are split across cmd + * buffers. Still, getting the simple case of cmd buffer annotation into + * perfetto should prove useful. + */ + const char *label = pLabelInfo->pLabelName; + if (cmd_buffer->state.pass) { + trace_start_cmd_buffer_annotation_rp( + &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label); + } else { + trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs, + strlen(label), label); + } +} + +void +tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer) +{ + VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer); + + if (cmd_buffer->vk.labels.size > 0) { + if (cmd_buffer->state.pass) { + trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace, + &cmd_buffer->draw_cs); + } else { + trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs); + } + } + + vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer); +} diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index d54e39a6da1..40e2f0e8f80 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -40,7 +40,9 @@ enum { */ enum tu_stage_id { CMD_BUFFER_STAGE_ID, + CMD_BUFFER_ANNOTATION_STAGE_ID, RENDER_PASS_STAGE_ID, + CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID, BINNING_STAGE_ID, GMEM_STAGE_ID, BYPASS_STAGE_ID, @@ -66,7 +68,9 @@ static const struct { const char *desc; } stages[] = { [CMD_BUFFER_STAGE_ID] = { "Command Buffer" }, + [CMD_BUFFER_ANNOTATION_STAGE_ID] = { "Annotation", "Command Buffer Annotation" }, [RENDER_PASS_STAGE_ID] = { "Render Pass" }, + [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID] = { "Annotation", "Render Pass Command Buffer Annotation" }, [BINNING_STAGE_ID] = { "Binning", "Perform Visibility pass and determine target bins" }, [GMEM_STAGE_ID] = { "GMEM", "Rendering to GMEM" }, [BYPASS_STAGE_ID] = { "Bypass", "Rendering to system memory" }, @@ -134,6 +138,9 @@ send_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns) auto packet = ctx.NewTracePacket(); + /* This must be set before interned data is sent. */ + packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED); + packet->set_timestamp(0); auto event = packet->set_gpu_render_stage_event(); @@ -192,6 +199,7 @@ static void stage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id, + const char *app_event, const void *payload = nullptr, size_t payload_size = 0, trace_payload_as_extra_func payload_as_extra = nullptr) @@ -214,10 +222,18 @@ stage_start(struct tu_device *dev, *stage = (struct tu_perfetto_stage) { .stage_id = stage_id, + .stage_iid = 0, .start_ts = ts_ns, .payload = payload, .start_payload_function = (void *) payload_as_extra, }; + + if (app_event) { + TuRenderpassDataSource::Trace([=](auto tctx) { + stage->stage_iid = + tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event); + }); + } } static void @@ -265,8 +281,11 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id, event->set_event_id(0); // ??? event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID); event->set_duration(ts_ns - stage->start_ts); - event->set_stage_id(stage->stage_id); - event->set_context((uintptr_t)dev); + if (stage->stage_iid) + event->set_stage_iid(stage->stage_iid); + else + event->set_stage_id(stage->stage_id); + event->set_context((uintptr_t) dev); event->set_submission_id(submission_id); if (stage->payload) { @@ -385,6 +404,13 @@ tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id) /* * Trace callbacks, called from u_trace once the timestamps from GPU have been * collected. + * + * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h + * and just take the tracepoint's args and add them as name/value pairs in the + * perfetto events. This file can usually just map a tu_perfetto_* to + * stage_start/end with a call to that codegenned "extra" func. But you can + * also provide your own entrypoint and extra funcs if you want to change that + * mapping. */ #define CREATE_EVENT_CALLBACK(event_name, stage_id) \ @@ -393,7 +419,7 @@ tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id) const struct trace_start_##event_name *payload) \ { \ stage_start( \ - dev, ts_ns, stage_id, payload, sizeof(*payload), \ + dev, ts_ns, stage_id, NULL, payload, sizeof(*payload), \ (trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \ } \ \ @@ -420,6 +446,58 @@ CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID) CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID) CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID) +void +tu_perfetto_start_cmd_buffer_annotation( + struct tu_device *dev, + uint64_t ts_ns, + const void *flush_data, + const struct trace_start_cmd_buffer_annotation *payload) +{ + /* No extra func necessary, the only arg is in the end payload.*/ + stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload, + sizeof(*payload), NULL); +} + +void +tu_perfetto_end_cmd_buffer_annotation( + struct tu_device *dev, + uint64_t ts_ns, + const void *flush_data, + const struct trace_end_cmd_buffer_annotation *payload) +{ + /* Pass the payload string as the app_event, which will appear right on the + * event block, rather than as metadata inside. + */ + stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data, + payload, NULL); +} + +void +tu_perfetto_start_cmd_buffer_annotation_rp( + struct tu_device *dev, + uint64_t ts_ns, + const void *flush_data, + const struct trace_start_cmd_buffer_annotation_rp *payload) +{ + /* No extra func necessary, the only arg is in the end payload.*/ + stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID, + payload->str, payload, sizeof(*payload), NULL); +} + +void +tu_perfetto_end_cmd_buffer_annotation_rp( + struct tu_device *dev, + uint64_t ts_ns, + const void *flush_data, + const struct trace_end_cmd_buffer_annotation_rp *payload) +{ + /* Pass the payload string as the app_event, which will appear right on the + * event block, rather than as metadata inside. + */ + stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID, + flush_data, payload, NULL); +} + #ifdef __cplusplus } #endif diff --git a/src/freedreno/vulkan/tu_perfetto.h b/src/freedreno/vulkan/tu_perfetto.h index 12f2bce1678..92a56c6355d 100644 --- a/src/freedreno/vulkan/tu_perfetto.h +++ b/src/freedreno/vulkan/tu_perfetto.h @@ -22,6 +22,10 @@ struct tu_u_trace_submission_data; struct tu_perfetto_stage { int stage_id; + /* dynamically allocated stage iid, for app_events. 0 if stage_id should be + * used instead. + */ + uint64_t stage_iid; uint64_t start_ts; const void* payload; void* start_payload_function; diff --git a/src/freedreno/vulkan/tu_tracepoints.py b/src/freedreno/vulkan/tu_tracepoints.py index e3d5da5b766..5af5ce69159 100644 --- a/src/freedreno/vulkan/tu_tracepoints.py +++ b/src/freedreno/vulkan/tu_tracepoints.py @@ -123,6 +123,15 @@ begin_end_tp('compute', Arg(type='uint16_t', var='num_groups_y', c_format='%u'), Arg(type='uint16_t', var='num_groups_z', c_format='%u')]) + +# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT +for suffix in ["", "_rp"]: + begin_end_tp('cmd_buffer_annotation' + suffix, + args=[ArgStruct(type='unsigned', var='len'), + ArgStruct(type='const char *', var='str'),], + tp_struct=[Arg(type='uint8_t', name='dummy', var='0', c_format='%hhu'), + Arg(type='char', name='str', var='str', c_format='%s', length_arg='len + 1', copy_func='strncpy'),]) + utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct tu_device *dev', diff --git a/src/util/perf/u_perfetto_renderpass.h b/src/util/perf/u_perfetto_renderpass.h index a128cf6fd28..13aad3481eb 100644 --- a/src/util/perf/u_perfetto_renderpass.h +++ b/src/util/perf/u_perfetto_renderpass.h @@ -114,6 +114,9 @@ class MesaRenderpassDataSource * event in the UI, rather than needing to click into the event to find the * name in the metadata. Intended for use with * vkCmdBeginDebugUtilsLabelEXT() and glPushDebugGroup(). + * + * Note that SEQ_INCREMENTAL_STATE_CLEARED must have been set in the + * sequence before this is called. */ uint64_t debug_marker_stage(TraceContext &ctx, const char *name) {
