Module: Mesa
Branch: main
Commit: 08f6d14b85b79af06d43ee1897e01720d7c9eb0d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=08f6d14b85b79af06d43ee1897e01720d7c9eb0d

Author: Timur Kristóf <[email protected]>
Date:   Tue Jan 10 23:39:42 2023 +0100

radv: Make NGG query emission a dirty flag.

Don't emit the NGG query user SGPR if its state doesn't change.
Based on original work by Mike Blumenkrantz.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18819>

---

 src/amd/vulkan/radv_cmd_buffer.c | 27 +++++++++++++++++++++------
 src/amd/vulkan/radv_meta.c       |  6 ++++++
 src/amd/vulkan/radv_private.h    |  1 +
 src/amd/vulkan/radv_query.c      | 24 ++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8753b2a0ed3..a8d2dacb804 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -5123,7 +5123,6 @@ radv_upload_graphics_shader_descriptors(struct 
radv_cmd_buffer *cmd_buffer)
    VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS;
    radv_flush_descriptors(cmd_buffer, stages, &pipeline->base, 
VK_PIPELINE_BIND_POINT_GRAPHICS);
    radv_flush_constants(cmd_buffer, stages, &pipeline->base, 
VK_PIPELINE_BIND_POINT_GRAPHICS);
-   radv_flush_ngg_query_state(cmd_buffer);
    radv_flush_force_vrs_state(cmd_buffer);
 }
 
@@ -5793,6 +5792,10 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, 
const VkCommandBufferBegi
 
       cmd_buffer->state.inherited_pipeline_statistics =
          pBeginInfo->pInheritanceInfo->pipelineStatistics;
+
+      if (cmd_buffer->state.inherited_pipeline_statistics &
+          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)
+         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 
    if (unlikely(cmd_buffer->device->trace_bo))
@@ -6371,6 +6374,10 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, 
VkPipelineBindPoint pipeline
          }
       }
 
+      /* Re-emit NGG query state when SGPR exists but location potentially 
changed. */
+      if 
(graphics_pipeline->last_vgt_api_stage_locs[AC_UD_NGG_QUERY_STATE].sgpr_idx != 
-1)
+         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
+
       /* Re-emit the rasterization samples state because the SGPR idx can be 
different. */
       const struct radv_shader *ps = 
graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT];
       if (ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx 
!= -1) {
@@ -7290,9 +7297,9 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, 
uint32_t commandBufferCou
    /* After executing commands from secondary buffers we have to dirty
     * some states.
     */
-   primary->state.dirty |=
-      RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | 
RADV_CMD_DIRTY_GUARDBAND |
-      RADV_CMD_DIRTY_DYNAMIC_ALL;
+   primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | 
RADV_CMD_DIRTY_INDEX_BUFFER |
+                           RADV_CMD_DIRTY_GUARDBAND | 
RADV_CMD_DIRTY_DYNAMIC_ALL |
+                           RADV_CMD_DIRTY_NGG_QUERY;
    radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
    radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
 }
@@ -8705,6 +8712,11 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer 
*cmd_buffer, const struct r
    if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS)
       radv_emit_rbplus_state(cmd_buffer);
 
+   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_NGG_QUERY) {
+      cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_NGG_QUERY;
+      radv_flush_ngg_query_state(cmd_buffer);
+   }
+
    if (cmd_buffer->device->physical_device->use_ngg_culling &&
        cmd_buffer->state.graphics_pipeline->is_ngg)
       radv_emit_ngg_culling_state(cmd_buffer, info);
@@ -10913,9 +10925,12 @@ radv_set_streamout_enable(struct radv_cmd_buffer 
*cmd_buffer, bool enable)
         (old_hw_enabled_mask != so->hw_enabled_mask)))
       radv_emit_streamout_enable(cmd_buffer);
 
-   if (cmd_buffer->device->physical_device->use_ngg_streamout && !enable) {
+   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
+
       /* Re-emit streamout buffers to unbind them. */
-      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+      if (!enable)
+         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
    }
 }
 
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index 55b2aa1903d..009fb3fe6bf 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -44,6 +44,7 @@ radv_suspend_queries(struct radv_meta_saved_state *state, 
struct radv_cmd_buffer
 
       state->active_pipeline_gds_queries = 
cmd_buffer->state.active_pipeline_gds_queries;
       cmd_buffer->state.active_pipeline_gds_queries = 0;
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 
    /* Occlusion queries. */
@@ -61,12 +62,14 @@ radv_suspend_queries(struct radv_meta_saved_state *state, 
struct radv_cmd_buffer
    if (cmd_buffer->state.active_prims_gen_gds_queries) {
       state->active_prims_gen_gds_queries = 
cmd_buffer->state.active_prims_gen_gds_queries;
       cmd_buffer->state.active_prims_gen_gds_queries = 0;
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 
    /* Transform feedback queries (NGG). */
    if (cmd_buffer->state.active_prims_xfb_gds_queries) {
       state->active_prims_xfb_gds_queries = 
cmd_buffer->state.active_prims_xfb_gds_queries;
       cmd_buffer->state.active_prims_xfb_gds_queries = 0;
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 }
 
@@ -79,6 +82,7 @@ radv_resume_queries(const struct radv_meta_saved_state 
*state, struct radv_cmd_b
       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
 
       cmd_buffer->state.active_pipeline_gds_queries = 
state->active_pipeline_gds_queries;
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 
    /* Occlusion queries. */
@@ -95,11 +99,13 @@ radv_resume_queries(const struct radv_meta_saved_state 
*state, struct radv_cmd_b
    /* Primitives generated queries (NGG). */
    if (state->active_prims_gen_gds_queries) {
       cmd_buffer->state.active_prims_gen_gds_queries = 
state->active_prims_gen_gds_queries;
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 
    /* Transform feedback queries (NGG). */
    if (state->active_prims_xfb_gds_queries) {
       cmd_buffer->state.active_prims_xfb_gds_queries = 
state->active_prims_xfb_gds_queries;
+      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
    }
 }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ab835ff870d..d2cb92bf27e 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1246,6 +1246,7 @@ enum radv_cmd_dirty_bits {
    RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 50,
    RADV_CMD_DIRTY_GUARDBAND = 1ull << 51,
    RADV_CMD_DIRTY_RBPLUS = 1ull << 52,
+   RADV_CMD_DIRTY_NGG_QUERY = 1ull << 53,
 };
 
 enum radv_cmd_flush_bits {
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index df7d807063b..cd30c23cbeb 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1834,6 +1834,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *poo
          /* Record that the command buffer needs GDS. */
          cmd_buffer->gds_needed = true;
 
+         if (!cmd_buffer->state.active_pipeline_gds_queries)
+            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
+
          cmd_buffer->state.active_pipeline_gds_queries++;
       }
       break;
@@ -1848,6 +1851,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *poo
          gfx10_copy_gds_query(cmd_buffer, 
RADV_NGG_QUERY_PRIM_XFB_OFFSET(index), va + 8);
          radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
 
+         if (!cmd_buffer->state.active_prims_xfb_gds_queries)
+            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
+
          cmd_buffer->state.active_prims_xfb_gds_queries++;
       } else {
          emit_sample_streamout(cmd_buffer, va, index);
@@ -1862,6 +1868,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *poo
          /* Record that the command buffer needs GDS. */
          cmd_buffer->gds_needed = true;
 
+         if (!cmd_buffer->state.active_prims_gen_gds_queries)
+            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
+
          cmd_buffer->state.active_prims_gen_gds_queries++;
       } else {
          if (!cmd_buffer->state.active_prims_gen_queries) {
@@ -1885,6 +1894,9 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, 
struct radv_query_pool *poo
             /* Record that the command buffer needs GDS. */
             cmd_buffer->gds_needed = true;
 
+            if (!cmd_buffer->state.active_prims_gen_gds_queries)
+               cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
+
             cmd_buffer->state.active_prims_gen_gds_queries++;
          }
       }
@@ -1957,6 +1969,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct 
radv_query_pool *pool,
          gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, 
va);
 
          cmd_buffer->state.active_pipeline_gds_queries--;
+
+         if (!cmd_buffer->state.active_pipeline_gds_queries)
+            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
       }
       break;
    }
@@ -1971,6 +1986,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct 
radv_query_pool *pool,
          radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
 
          cmd_buffer->state.active_prims_xfb_gds_queries--;
+
+         if (!cmd_buffer->state.active_prims_xfb_gds_queries)
+            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
       } else {
          emit_sample_streamout(cmd_buffer, va + 16, index);
       }
@@ -1982,6 +2000,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct 
radv_query_pool *pool,
          radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
 
          cmd_buffer->state.active_prims_gen_gds_queries--;
+
+         if (!cmd_buffer->state.active_prims_gen_gds_queries)
+            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
       } else {
          if (cmd_buffer->state.active_prims_gen_queries == 1) {
             bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer);
@@ -2002,6 +2023,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct 
radv_query_pool *pool,
             gfx10_copy_gds_query(cmd_buffer, 
RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 36);
 
             cmd_buffer->state.active_prims_gen_gds_queries--;
+
+            if (!cmd_buffer->state.active_prims_gen_gds_queries)
+               cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
          }
       }
       break;

Reply via email to