Module: Mesa
Branch: main
Commit: 338319741c3b11584188614434660d7dc800a119
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=338319741c3b11584188614434660d7dc800a119

Author: Samuel Pitoiset <[email protected]>
Date:   Wed Oct 25 17:46:35 2023 +0200

radv: add DGC support for mesh shader only

This only implements mesh shaders with DGC because task shaders are
really tricky. I will address them later.

Signed-off-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25890>

---

 src/amd/vulkan/radv_cmd_buffer.c                |  22 +++--
 src/amd/vulkan/radv_device_generated_commands.c | 117 ++++++++++++++++++++++--
 src/amd/vulkan/radv_private.h                   |   1 +
 3 files changed, 127 insertions(+), 13 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7f33942747a..c17b646bd62 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -9032,7 +9032,8 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, 
const struct radv_draw_info
 }
 
 ALWAYS_INLINE static bool
-radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct 
radv_draw_info *info, uint32_t drawCount)
+radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct 
radv_draw_info *info, uint32_t drawCount,
+                          bool dgc)
 {
    /* For direct draws, this makes sure we don't draw anything.
     * For indirect draws, this is necessary to prevent a GPU hang (on MEC 
version < 100).
@@ -9090,7 +9091,8 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer 
*cmd_buffer, const struct radv_
    if (pc_stages)
       radv_flush_constants(cmd_buffer, pc_stages, 
VK_PIPELINE_BIND_POINT_GRAPHICS);
 
-   radv_describe_draw(cmd_buffer);
+   if (!dgc)
+      radv_describe_draw(cmd_buffer);
    if (likely(!info->indirect)) {
       struct radv_cmd_state *state = &cmd_buffer->state;
       if (unlikely(state->last_num_instances != 1)) {
@@ -9335,7 +9337,7 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, 
uint32_t x, uint32_t y,
    info.count_buffer = NULL;
    info.indirect = NULL;
 
-   if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1))
+   if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1, false))
       return;
 
    if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
@@ -9368,7 +9370,7 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer 
commandBuffer, VkBuffer _buffer
    info.indexed = false;
    info.instance_count = 0;
 
-   if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount))
+   if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount, false))
       return;
 
    if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
@@ -9402,7 +9404,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer 
commandBuffer, VkBuffer _b
    info.indexed = false;
    info.instance_count = 0;
 
-   if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount))
+   if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount, false))
       return;
 
    if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
@@ -9453,6 +9455,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer 
commandBuffer, VkBool32 isPre
    if (compute) {
       radv_dgc_before_dispatch(cmd_buffer);
    } else {
+      struct radv_graphics_pipeline *graphics_pipeline = 
radv_pipeline_to_graphics(pipeline);
       struct radv_draw_info info;
 
       info.count = pGeneratedCommandsInfo->sequencesCount;
@@ -9465,8 +9468,13 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer 
commandBuffer, VkBool32 isPre
       info.indexed = layout->indexed;
       info.instance_count = 0;
 
-      if (!radv_before_draw(cmd_buffer, &info, 1, true))
-         return;
+      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH)) {
+         if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1, true))
+            return;
+      } else {
+         if (!radv_before_draw(cmd_buffer, &info, 1, true))
+            return;
+      }
    }
 
    uint32_t cmdbuf_size = 
radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo);
diff --git a/src/amd/vulkan/radv_device_generated_commands.c 
b/src/amd/vulkan/radv_device_generated_commands.c
index 8a98ad0df72..b656ec5018d 100644
--- a/src/amd/vulkan/radv_device_generated_commands.c
+++ b/src/amd/vulkan/radv_device_generated_commands.c
@@ -89,8 +89,13 @@ radv_get_sequence_size_graphics(const struct 
radv_indirect_command_layout *layou
          *cmd_size += (4 + (pipeline->uses_drawid ? 10 : 5)) * 4;
       }
    } else {
-      /* userdata writes + instance count + non-indexed draw */
-      *cmd_size += (5 + 2 + 3) * 4;
+      if (layout->draw_mesh_tasks) {
+         /* userdata writes + instance count + non-indexed draw */
+         *cmd_size += (6 + 2 + (device->mesh_fast_launch_2 ? 5 : 3)) * 4;
+      } else {
+         /* userdata writes + instance count + non-indexed draw */
+         *cmd_size += (5 + 2 + 3) * 4;
+      }
    }
 
    if (device->sqtt.bo) {
@@ -196,6 +201,7 @@ struct radv_dgc_params {
    uint16_t binds_index_buffer;
    uint16_t vtx_base_sgpr;
    uint32_t max_index_count;
+   uint8_t draw_mesh_tasks;
 
    /* dispatch info */
    uint32_t dispatch_initiator;
@@ -229,6 +235,7 @@ struct radv_dgc_params {
 enum {
    DGC_USES_DRAWID = 1u << 14,
    DGC_USES_BASEINSTANCE = 1u << 15,
+   DGC_USES_GRID_SIZE = DGC_USES_BASEINSTANCE, /* Mesh shader only */
 };
 
 enum {
@@ -326,6 +333,37 @@ dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf 
*cs, nir_def *vtx_bas
    dgc_emit(b, cs, nir_vec(b, values, 5));
 }
 
+static void
+dgc_emit_userdata_mesh(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*vtx_base_sgpr, nir_def *x, nir_def *y,
+                       nir_def *z, nir_def *drawid, const struct radv_device 
*device)
+{
+   vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
+   nir_def *has_grid_size = nir_test_mask(b, vtx_base_sgpr, 
DGC_USES_GRID_SIZE);
+   nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
+
+   nir_push_if(b, nir_ior(b, has_grid_size, has_drawid));
+   {
+      nir_def *pkt_cnt = nir_imm_int(b, 0);
+      pkt_cnt = nir_bcsel(b, has_grid_size, nir_iadd_imm(b, pkt_cnt, 3), 
pkt_cnt);
+      pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
+
+      nir_def *values[6] = {
+         nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 
0x3FFF), dgc_get_nop_packet(b, device),
+         dgc_get_nop_packet(b, device),         dgc_get_nop_packet(b, device), 
         dgc_get_nop_packet(b, device),
+      };
+
+      /* DrawID needs to be first if no GridSize. */
+      values[2] = nir_bcsel(b, has_grid_size, x, drawid);
+      values[3] = nir_bcsel(b, has_grid_size, y, values[3]);
+      values[4] = nir_bcsel(b, has_grid_size, z, values[4]);
+      values[5] = nir_bcsel(b, has_drawid, drawid, values[5]);
+
+      for (uint32_t i = 0; i < ARRAY_SIZE(values); i++)
+         dgc_emit(b, cs, values[i]);
+   }
+   nir_pop_if(b, NULL);
+}
+
 static void
 dgc_emit_sqtt_userdata(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *data)
 {
@@ -447,6 +485,15 @@ dgc_emit_dispatch_direct(nir_builder *b, struct dgc_cmdbuf 
*cs, nir_def *wg_x, n
    dgc_emit(b, cs, nir_vec(b, values, 5));
 }
 
+static void
+dgc_emit_dispatch_mesh_direct(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*x, nir_def *y, nir_def *z)
+{
+   nir_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, 
false)), x, y, z,
+                         nir_imm_int(b, 
S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX))};
+
+   dgc_emit(b, cs, nir_vec(b, values, 5));
+}
+
 static void
 dgc_emit_grid_size_user_sgpr(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*grid_base_sgpr, nir_def *wg_x,
                              nir_def *wg_y, nir_def *wg_z)
@@ -1111,6 +1158,42 @@ dgc_emit_dispatch(nir_builder *b, struct dgc_cmdbuf *cs, 
nir_def *stream_buf, ni
    nir_pop_if(b, 0);
 }
 
+/**
+ * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV.
+ */
+static void
+dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*stream_buf, nir_def *stream_base,
+                         nir_def *draw_params_offset, nir_def *sequence_id, 
const struct radv_device *device)
+{
+   nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
+   nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
+
+   nir_def *draw_data = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
+   nir_def *x = nir_channel(b, draw_data, 0);
+   nir_def *y = nir_channel(b, draw_data, 1);
+   nir_def *z = nir_channel(b, draw_data, 2);
+
+   nir_push_if(b, nir_iand(b, nir_ine_imm(b, x, 0), nir_iand(b, nir_ine_imm(b, 
y, 0), nir_ine_imm(b, z, 0))));
+   {
+      dgc_emit_sqtt_begin_api_marker(b, cs, ApiCmdDrawMeshTasksEXT);
+      dgc_emit_sqtt_marker_event(b, cs, sequence_id, EventCmdDrawMeshTasksEXT);
+
+      dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, 
device);
+      dgc_emit_instance_count(b, cs, nir_imm_int(b, 1));
+
+      if (device->mesh_fast_launch_2) {
+         dgc_emit_dispatch_mesh_direct(b, cs, x, y, z);
+      } else {
+         nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
+         dgc_emit_draw_index_auto(b, cs, vertex_count);
+      }
+
+      dgc_emit_sqtt_thread_trace_marker(b, cs);
+      dgc_emit_sqtt_end_api_marker(b, cs, ApiCmdDrawMeshTasksEXT);
+   }
+   nir_pop_if(b, NULL);
+}
+
 static nir_shader *
 build_dgc_prepare_shader(struct radv_device *dev)
 {
@@ -1188,8 +1271,18 @@ build_dgc_prepare_shader(struct radv_device *dev)
       {
          nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0));
          {
-            dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, 
load_param16(&b, draw_params_offset), sequence_id,
-                          dev);
+            nir_def *draw_mesh_tasks = load_param8(&b, draw_mesh_tasks);
+            nir_push_if(&b, nir_ieq_imm(&b, draw_mesh_tasks, 0));
+            {
+               dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, 
load_param16(&b, draw_params_offset), sequence_id,
+                             dev);
+            }
+            nir_push_else(&b, NULL);
+            {
+               dgc_emit_draw_mesh_tasks(&b, &cmd_buf, stream_buf, stream_base, 
load_param16(&b, draw_params_offset),
+                                        sequence_id, dev);
+            }
+            nir_pop_if(&b, NULL);
          }
          nir_push_else(&b, NULL);
          {
@@ -1413,6 +1506,10 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, 
const VkIndirectCommandsLa
             layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset 
+ k * 4;
          }
          break;
+      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV:
+         layout->draw_mesh_tasks = true;
+         layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
+         break;
       default:
          unreachable("Unhandled token type");
       }
@@ -1576,8 +1673,15 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer 
*cmd_buffer, const VkGeneratedC
 
    if (cmd_buffer->state.graphics_pipeline->uses_drawid)
       vtx_base_sgpr |= DGC_USES_DRAWID;
-   if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
-      vtx_base_sgpr |= DGC_USES_BASEINSTANCE;
+
+   if (layout->draw_mesh_tasks) {
+      struct radv_shader *mesh_shader = 
radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_MESH);
+      if (mesh_shader->info.cs.uses_grid_size)
+         vtx_base_sgpr |= DGC_USES_GRID_SIZE;
+   } else {
+      if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
+         vtx_base_sgpr |= DGC_USES_BASEINSTANCE;
+   }
 
    params->draw_indexed = layout->indexed;
    params->draw_params_offset = layout->draw_params_offset;
@@ -1587,6 +1691,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer 
*cmd_buffer, const VkGeneratedC
    params->index_buffer_offset = layout->index_buffer_offset;
    params->ibo_type_32 = layout->ibo_type_32;
    params->ibo_type_8 = layout->ibo_type_8;
+   params->draw_mesh_tasks = layout->draw_mesh_tasks;
 
    if (layout->bind_vbo_mask) {
       uint32_t mask = vs->info.vs.vb_desc_usage_mask;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 1f26c8df946..72477307b5e 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -3253,6 +3253,7 @@ struct radv_indirect_command_layout {
 
    bool indexed;
    bool binds_index_buffer;
+   bool draw_mesh_tasks;
    uint16_t draw_params_offset;
    uint16_t index_buffer_offset;
 

Reply via email to