Module: Mesa
Branch: main
Commit: eef1511437ac6173dfd202b2fc581860d161c183
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eef1511437ac6173dfd202b2fc581860d161c183

Author: Samuel Pitoiset <[email protected]>
Date:   Tue Aug 30 17:11:57 2022 +0200

radv: implement dynamic patch control points

Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18344>

---

 src/amd/vulkan/radv_cmd_buffer.c | 127 +++++++++++++++++++++++++++++++++++----
 src/amd/vulkan/radv_pipeline.c   |  59 ++++++------------
 src/amd/vulkan/radv_private.h    |   7 ++-
 3 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index de4a9acd7d0..d03265a9783 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -121,6 +121,7 @@ const struct radv_dynamic_state default_dynamic_state = {
    .rasterizer_discard_enable = 0u,
    .logic_op = 0u,
    .color_write_enable = 0u,
+   .patch_control_points = 0,
 };
 
 static void
@@ -252,6 +253,8 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, 
const struct radv_dy
 
    RADV_CMP_COPY(color_write_enable, RADV_DYNAMIC_COLOR_WRITE_ENABLE);
 
+   RADV_CMP_COPY(patch_control_points, RADV_DYNAMIC_PATCH_CONTROL_POINTS);
+
 #undef RADV_CMP_COPY
 
    cmd_buffer->state.dirty |= dest_mask;
@@ -1442,7 +1445,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
                                  RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
                                  
RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
                                  RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
-                                 RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+                                 RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP |
+                                 RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS;
 
    if (!cmd_buffer->state.emitted_graphics_pipeline ||
        cmd_buffer->state.emitted_graphics_pipeline->negative_one_to_one != 
pipeline->negative_one_to_one ||
@@ -1855,6 +1859,61 @@ radv_emit_color_write_enable(struct radv_cmd_buffer 
*cmd_buffer)
                           pipeline->cb_target_mask & d->color_write_enable);
 }
 
+static void
+radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
+{
+   const struct radv_physical_device *pdevice = 
cmd_buffer->device->physical_device;
+   struct radv_graphics_pipeline *pipeline = 
cmd_buffer->state.graphics_pipeline;
+   struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
+   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+   unsigned ls_hs_config, base_reg;
+   struct radv_userdata_info *loc;
+
+   ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) |
+                  S_028B58_HS_NUM_INPUT_CP(d->patch_control_points) |
+                  S_028B58_HS_NUM_OUTPUT_CP(tcs->info.tcs.tcs_vertices_out);
+
+   if (pdevice->rad_info.gfx_level >= GFX7) {
+      radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, 
ls_hs_config);
+   } else {
+      radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 
ls_hs_config);
+   }
+
+   if (pdevice->rad_info.gfx_level >= GFX9) {
+      unsigned hs_rsrc2 = tcs->config.rsrc2;
+
+      if (pdevice->rad_info.gfx_level >= GFX10) {
+         hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(cmd_buffer->state.tess_lds_size);
+      } else {
+         hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(cmd_buffer->state.tess_lds_size);
+      }
+
+      radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, 
hs_rsrc2);
+   } else {
+      struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
+      unsigned ls_rsrc2 = vs->config.rsrc2 | 
S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
+
+      radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, 
ls_rsrc2);
+   }
+
+   /* Emit user SGPRs for dynamic patch control points. */
+   loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_TESS_CTRL, 
AC_UD_TCS_OFFCHIP_LAYOUT);
+   if (loc->sgpr_idx == -1)
+      return;
+   assert(loc->num_sgprs == 1);
+
+   base_reg = pipeline->base.user_data_0[MESA_SHADER_TESS_CTRL];
+   radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
+                     (cmd_buffer->state.tess_num_patches << 6) | 
d->patch_control_points);
+
+   loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_TESS_EVAL, 
AC_UD_TES_NUM_PATCHES);
+   assert(loc->sgpr_idx != -1 && loc->num_sgprs == 1);
+
+   base_reg = pipeline->base.user_data_0[MESA_SHADER_TESS_EVAL];
+   radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
+                     cmd_buffer->state.tess_num_patches);
+}
+
 static void
 radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
                          struct radv_color_buffer_info *cb, struct 
radv_image_view *iview,
@@ -3273,6 +3332,9 @@ radv_cmd_buffer_flush_dynamic_state(struct 
radv_cmd_buffer *cmd_buffer, bool pip
    if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT)
       radv_emit_vertex_input(cmd_buffer, pipeline_is_dirty);
 
+   if (states & RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS)
+      radv_emit_patch_control_points(cmd_buffer);
+
    cmd_buffer->state.dirty &= ~states;
 }
 
@@ -3982,20 +4044,14 @@ si_emit_ia_multi_vgt_param(struct radv_cmd_buffer 
*cmd_buffer, bool instanced_dr
    struct radv_cmd_state *state = &cmd_buffer->state;
    unsigned topology = state->dynamic.primitive_topology;
    bool prim_restart_enable = state->dynamic.primitive_restart_enable;
-   unsigned patch_control_points = 
state->graphics_pipeline->tess_patch_control_points;
+   unsigned patch_control_points = state->dynamic.patch_control_points;
    struct radeon_cmdbuf *cs = cmd_buffer->cs;
-   unsigned num_tess_patches = 0;
    unsigned ia_multi_vgt_param;
 
-   if (radv_pipeline_has_stage(state->graphics_pipeline, 
MESA_SHADER_TESS_CTRL)) {
-      struct radv_shader *tcs = 
state->graphics_pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
-      num_tess_patches = tcs->info.num_tess_patches;
-   }
-
    ia_multi_vgt_param =
       si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, 
count_from_stream_output,
                                 draw_vertex_count, topology, 
prim_restart_enable,
-                                patch_control_points, num_tess_patches);
+                                patch_control_points, state->tess_num_patches);
 
    if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
       if (info->gfx_level == GFX9) {
@@ -4023,7 +4079,7 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer)
       return;
 
    if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
-      primgroup_size = 
pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+      primgroup_size = state->tess_num_patches;
 
       if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
           radv_get_shader(&pipeline->base, 
MESA_SHADER_TESS_EVAL)->info.uses_prim_id) {
@@ -5109,6 +5165,19 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, 
VkPipelineBindPoint pipeline
          cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
       }
 
+      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL) &&
+          !(graphics_pipeline->dynamic_states & 
RADV_DYNAMIC_PATCH_CONTROL_POINTS)) {
+         /* Bind the tessellation state from the pipeline when it's not 
dynamic and make sure to
+          * emit it if the number of patches or the LDS size changed.
+          */
+         struct radv_shader *tcs = 
graphics_pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
+
+         cmd_buffer->state.tess_num_patches = tcs->info.num_tess_patches;
+         cmd_buffer->state.tess_lds_size = tcs->info.tcs.num_lds_blocks;
+
+         cmd_buffer->state.dirty |= 
RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS;
+      }
+
       radv_bind_dynamic_state(cmd_buffer, &graphics_pipeline->dynamic_state);
 
       if (graphics_pipeline->esgs_ring_size > 
cmd_buffer->esgs_ring_size_needed)
@@ -5509,7 +5578,12 @@ radv_CmdSetRasterizerDiscardEnable(VkCommandBuffer 
commandBuffer, VkBool32 raste
 VKAPI_ATTR void VKAPI_CALL
 radv_CmdSetPatchControlPointsEXT(VkCommandBuffer commandBuffer, uint32_t 
patchControlPoints)
 {
-   /* not implemented */
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct radv_cmd_state *state = &cmd_buffer->state;
+
+   state->dynamic.patch_control_points = patchControlPoints;
+
+   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS;
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -7195,6 +7269,37 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer 
*cmd_buffer, const struct r
       }
    }
 
+   /* Pre-compute some tessellation info that depend on the number of patch 
control points when the
+    * bound pipeline declared this state as dynamic.
+    */
+   if (cmd_buffer->state.graphics_pipeline->dynamic_states & 
RADV_DYNAMIC_PATCH_CONTROL_POINTS) {
+      uint64_t dynamic_states =
+         cmd_buffer->state.dirty & 
cmd_buffer->state.emitted_graphics_pipeline->needed_dynamic_state;
+
+      if (dynamic_states & RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS) {
+         const struct radv_physical_device *pdevice = device->physical_device;
+         const struct radv_graphics_pipeline *pipeline = 
cmd_buffer->state.graphics_pipeline;
+         const struct radv_shader *tcs = 
pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
+         const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+
+         /* Compute the number of patches and emit the context register. */
+         cmd_buffer->state.tess_num_patches =
+            get_tcs_num_patches(d->patch_control_points, 
tcs->info.tcs.tcs_vertices_out,
+                                tcs->info.tcs.num_linked_inputs, 
tcs->info.tcs.num_linked_outputs,
+                                tcs->info.tcs.num_linked_patch_outputs,
+                                pdevice->hs.tess_offchip_block_dw_size, 
pdevice->rad_info.gfx_level,
+                                pdevice->rad_info.family);
+
+         /* Compute the LDS size and emit the shader register. */
+         cmd_buffer->state.tess_lds_size =
+            calculate_tess_lds_size(pdevice->rad_info.gfx_level, 
d->patch_control_points,
+                                    tcs->info.tcs.tcs_vertices_out, 
tcs->info.tcs.num_linked_inputs,
+                                    cmd_buffer->state.tess_num_patches,
+                                    tcs->info.tcs.num_linked_outputs,
+                                    tcs->info.tcs.num_linked_patch_outputs);
+      }
+   }
+
    radv_cmd_buffer_flush_dynamic_state(cmd_buffer, pipeline_is_dirty);
 
    radv_emit_draw_registers(cmd_buffer, info);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index bf8ae5a710a..5ae0be499a2 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1369,9 +1369,14 @@ radv_pipeline_needed_dynamic_state(const struct 
radv_graphics_pipeline *pipeline
 
    /* Disable dynamic states that are useless when rasterization is disabled. 
*/
    if (!raster_enabled) {
-      return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | 
RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
-             RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | 
RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
-             RADV_DYNAMIC_VERTEX_INPUT;
+      states = RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | 
RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
+               RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | 
RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
+               RADV_DYNAMIC_VERTEX_INPUT;
+
+      if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
+         states |= RADV_DYNAMIC_PATCH_CONTROL_POINTS;
+
+      return states;
    }
 
    if (!state->rs->depth_bias.enable &&
@@ -1405,6 +1410,9 @@ radv_pipeline_needed_dynamic_state(const struct 
radv_graphics_pipeline *pipeline
    if (!has_color_att)
       states &= ~RADV_DYNAMIC_COLOR_WRITE_ENABLE;
 
+   if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT))
+      states &= ~RADV_DYNAMIC_PATCH_CONTROL_POINTS;
+
    return states;
 }
 
@@ -1805,6 +1813,10 @@ radv_pipeline_init_dynamic_state(struct 
radv_graphics_pipeline *pipeline,
       }
    }
 
+   if (states & RADV_DYNAMIC_PATCH_CONTROL_POINTS) {
+      dynamic->patch_control_points = state->ts->patch_control_points;
+   }
+
    pipeline->dynamic_state.mask = states;
 }
 
@@ -4807,20 +4819,11 @@ static void
 radv_pipeline_emit_hw_ls(struct radeon_cmdbuf *cs, const struct 
radv_graphics_pipeline *pipeline,
                          const struct radv_shader *shader)
 {
-   const struct radv_physical_device *pdevice = 
pipeline->base.device->physical_device;
-   unsigned num_lds_blocks = 
pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
    uint64_t va = radv_shader_get_va(shader);
-   uint32_t rsrc2 = shader->config.rsrc2;
 
    radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
 
-   rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
-   if (pdevice->rad_info.gfx_level == GFX7 && pdevice->rad_info.family != 
CHIP_HAWAII)
-      radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
-
-   radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
-   radeon_emit(cs, shader->config.rsrc1);
-   radeon_emit(cs, rsrc2);
+   radeon_set_sh_reg(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 
shader->config.rsrc1);
 }
 
 static void
@@ -5010,21 +5013,13 @@ radv_pipeline_emit_hw_hs(struct radeon_cmdbuf *cs, 
const struct radv_graphics_pi
    uint64_t va = radv_shader_get_va(shader);
 
    if (pdevice->rad_info.gfx_level >= GFX9) {
-      uint32_t rsrc2 = shader->config.rsrc2;
-
       if (pdevice->rad_info.gfx_level >= GFX10) {
-         rsrc2 |= S_00B42C_LDS_SIZE_GFX10(shader->info.tcs.num_lds_blocks);
-
          radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
       } else {
-         rsrc2 |= S_00B42C_LDS_SIZE_GFX9(shader->info.tcs.num_lds_blocks);
-
          radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
       }
 
-      radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
-      radeon_emit(cs, shader->config.rsrc1);
-      radeon_emit(cs, rsrc2);
+      radeon_set_sh_reg(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 
shader->config.rsrc1);
    } else {
       radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
       radeon_emit(cs, va >> 8);
@@ -5092,22 +5087,6 @@ radv_pipeline_emit_tess_state(struct radeon_cmdbuf 
*ctx_cs,
    const struct radv_physical_device *pdevice = 
pipeline->base.device->physical_device;
    struct radv_shader *tes = radv_get_shader(&pipeline->base, 
MESA_SHADER_TESS_EVAL);
    unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
-   unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches;
-   unsigned ls_hs_config;
-
-   num_tcs_input_cp = state->ts->patch_control_points;
-   num_tcs_output_cp =
-      
pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; // 
TCS VERTICES OUT
-   num_patches = 
pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
-
-   ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | 
S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
-                  S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
-
-   if (pdevice->rad_info.gfx_level >= GFX7) {
-      radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, 
ls_hs_config);
-   } else {
-      radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
-   }
 
    switch (tes->info.tes._primitive_mode) {
    case TESS_PRIMITIVE_TRIANGLES:
@@ -6087,10 +6066,6 @@ radv_graphics_pipeline_init(struct 
radv_graphics_pipeline *pipeline, struct radv
       radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
    }
 
-   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
-      pipeline->tess_patch_control_points = state.ts->patch_control_points;
-   }
-
    if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
       radv_pipeline_init_vertex_input_state(pipeline, &state);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3e536488de0..e1869a1dc71 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1328,6 +1328,8 @@ struct radv_dynamic_state {
    unsigned logic_op;
 
    uint32_t color_write_enable;
+
+   uint32_t patch_control_points;
 };
 
 extern const struct radv_dynamic_state default_dynamic_state;
@@ -1553,6 +1555,10 @@ struct radv_cmd_state {
 
    /* Whether this commandbuffer uses performance counters. */
    bool uses_perf_counters;
+
+   /* Tessellation info when patch control points is dynamic. */
+   unsigned tess_num_patches;
+   unsigned tess_lds_size;
 };
 
 struct radv_cmd_buffer_upload {
@@ -2002,7 +2008,6 @@ struct radv_graphics_pipeline {
    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
    uint8_t vtx_emit_num;
    uint64_t needed_dynamic_state;
-   unsigned tess_patch_control_points;
    unsigned pa_su_sc_mode_cntl;
    unsigned pa_cl_clip_cntl;
    unsigned cb_color_control;

Reply via email to