Module: Mesa Branch: main Commit: 08f6d14b85b79af06d43ee1897e01720d7c9eb0d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=08f6d14b85b79af06d43ee1897e01720d7c9eb0d
Author: Timur Kristóf <[email protected]> Date: Tue Jan 10 23:39:42 2023 +0100 radv: Make NGG query emission a dirty flag. Don't emit the NGG query user SGPR if its state doesn't change. Based on original work by Mike Blumenkrantz. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18819> --- src/amd/vulkan/radv_cmd_buffer.c | 27 +++++++++++++++++++++------ src/amd/vulkan/radv_meta.c | 6 ++++++ src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_query.c | 24 ++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8753b2a0ed3..a8d2dacb804 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5123,7 +5123,6 @@ radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS; radv_flush_descriptors(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); radv_flush_constants(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); - radv_flush_ngg_query_state(cmd_buffer); radv_flush_force_vrs_state(cmd_buffer); } @@ -5793,6 +5792,10 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics; + + if (cmd_buffer->state.inherited_pipeline_statistics & + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } if (unlikely(cmd_buffer->device->trace_bo)) @@ -6371,6 +6374,10 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline } } + /* Re-emit NGG query state when SGPR exists but location potentially changed. */ + if (graphics_pipeline->last_vgt_api_stage_locs[AC_UD_NGG_QUERY_STATE].sgpr_idx != -1) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + /* Re-emit the rasterization samples state because the SGPR idx can be different. */ const struct radv_shader *ps = graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]; if (ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx != -1) { @@ -7290,9 +7297,9 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou /* After executing commands from secondary buffers we have to dirty * some states. */ - primary->state.dirty |= - RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_GUARDBAND | - RADV_CMD_DIRTY_DYNAMIC_ALL; + primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | + RADV_CMD_DIRTY_GUARDBAND | RADV_CMD_DIRTY_DYNAMIC_ALL | + RADV_CMD_DIRTY_NGG_QUERY; radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); } @@ -8705,6 +8712,11 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS) radv_emit_rbplus_state(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_NGG_QUERY) { + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_NGG_QUERY; + radv_flush_ngg_query_state(cmd_buffer); + } + if (cmd_buffer->device->physical_device->use_ngg_culling && cmd_buffer->state.graphics_pipeline->is_ngg) radv_emit_ngg_culling_state(cmd_buffer, info); @@ -10913,9 +10925,12 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) (old_hw_enabled_mask != so->hw_enabled_mask))) radv_emit_streamout_enable(cmd_buffer); - if (cmd_buffer->device->physical_device->use_ngg_streamout && !enable) { + if (cmd_buffer->device->physical_device->use_ngg_streamout) { + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + /* Re-emit streamout buffers to unbind them. */ - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; + if (!enable) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; } } diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c index 55b2aa1903d..009fb3fe6bf 100644 --- a/src/amd/vulkan/radv_meta.c +++ b/src/amd/vulkan/radv_meta.c @@ -44,6 +44,7 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer state->active_pipeline_gds_queries = cmd_buffer->state.active_pipeline_gds_queries; cmd_buffer->state.active_pipeline_gds_queries = 0; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } /* Occlusion queries. */ @@ -61,12 +62,14 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer if (cmd_buffer->state.active_prims_gen_gds_queries) { state->active_prims_gen_gds_queries = cmd_buffer->state.active_prims_gen_gds_queries; cmd_buffer->state.active_prims_gen_gds_queries = 0; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } /* Transform feedback queries (NGG). */ if (cmd_buffer->state.active_prims_xfb_gds_queries) { state->active_prims_xfb_gds_queries = cmd_buffer->state.active_prims_xfb_gds_queries; cmd_buffer->state.active_prims_xfb_gds_queries = 0; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } } @@ -79,6 +82,7 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS; cmd_buffer->state.active_pipeline_gds_queries = state->active_pipeline_gds_queries; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } /* Occlusion queries. */ @@ -95,11 +99,13 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b /* Primitives generated queries (NGG). */ if (state->active_prims_gen_gds_queries) { cmd_buffer->state.active_prims_gen_gds_queries = state->active_prims_gen_gds_queries; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } /* Transform feedback queries (NGG). */ if (state->active_prims_xfb_gds_queries) { cmd_buffer->state.active_prims_xfb_gds_queries = state->active_prims_xfb_gds_queries; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index ab835ff870d..d2cb92bf27e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1246,6 +1246,7 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 50, RADV_CMD_DIRTY_GUARDBAND = 1ull << 51, RADV_CMD_DIRTY_RBPLUS = 1ull << 52, + RADV_CMD_DIRTY_NGG_QUERY = 1ull << 53, }; enum radv_cmd_flush_bits { diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index df7d807063b..cd30c23cbeb 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1834,6 +1834,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; + if (!cmd_buffer->state.active_pipeline_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + cmd_buffer->state.active_pipeline_gds_queries++; } break; @@ -1848,6 +1851,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_XFB_OFFSET(index), va + 8); radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000); + if (!cmd_buffer->state.active_prims_xfb_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + cmd_buffer->state.active_prims_xfb_gds_queries++; } else { emit_sample_streamout(cmd_buffer, va, index); @@ -1862,6 +1868,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; + if (!cmd_buffer->state.active_prims_gen_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + cmd_buffer->state.active_prims_gen_gds_queries++; } else { if (!cmd_buffer->state.active_prims_gen_queries) { @@ -1885,6 +1894,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; + if (!cmd_buffer->state.active_prims_gen_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + cmd_buffer->state.active_prims_gen_gds_queries++; } } @@ -1957,6 +1969,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va); cmd_buffer->state.active_pipeline_gds_queries--; + + if (!cmd_buffer->state.active_pipeline_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } break; } @@ -1971,6 +1986,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000); cmd_buffer->state.active_prims_xfb_gds_queries--; + + if (!cmd_buffer->state.active_prims_xfb_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } else { emit_sample_streamout(cmd_buffer, va + 16, index); } @@ -1982,6 +2000,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); cmd_buffer->state.active_prims_gen_gds_queries--; + + if (!cmd_buffer->state.active_prims_gen_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } else { if (cmd_buffer->state.active_prims_gen_queries == 1) { bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer); @@ -2002,6 +2023,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 36); cmd_buffer->state.active_prims_gen_gds_queries--; + + if (!cmd_buffer->state.active_prims_gen_gds_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; } } break;
