Module: Mesa
Branch: main
Commit: 6d4f43f0d6f58edb5bf236ddf1ce6f798afa851b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d4f43f0d6f58edb5bf236ddf1ce6f798afa851b

Author: Rohan Garg <rohan.g...@intel.com>
Date:   Thu Jun 23 09:59:37 2022 +0200

anv: Emit EXECUTE_INDIRECT_DRAW when available

On newer platforms (Arrowlake and above) we can issue a
EXECUTE_INDIRECT_DRAW that allows us to:
  * Skip issuing mi load/store instructions for indirect parameters
  * Skip doing the indirect draw unroll on the CPU side when the
    appropriate stride is passed

Signed-off-by: Rohan Garg <rohan.g...@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26421>

---

 src/intel/vulkan/genX_cmd_buffer.c | 69 +++++++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 89aeb3a1d1a..504f5acb590 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -4871,6 +4871,26 @@ load_indirect_parameters(struct anv_cmd_buffer 
*cmd_buffer,
 #endif
 }
 
+static const bool
+execute_indirect_draw_supported(struct anv_cmd_buffer *cmd_buffer)
+{
+#if GFX_VERx10 >= 125
+   const struct intel_device_info *devinfo = cmd_buffer->device->info;
+   struct anv_graphics_pipeline *pipeline =
+      anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+   const bool is_multiview = pipeline->instance_multiplier > 1;
+
+   return (devinfo->has_indirect_unroll &&
+           !is_multiview &&
+           !vs_prog_data->uses_firstvertex &&
+           !vs_prog_data->uses_baseinstance &&
+           !vs_prog_data->uses_drawid);
+#else
+   return false;
+#endif
+}
+
 static void
 emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer,
                     struct anv_address indirect_data_addr,
@@ -4883,6 +4903,12 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer,
       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 #endif
+   UNUSED const struct intel_device_info *devinfo = cmd_buffer->device->info;
+   UNUSED const bool aligned_stride =
+      (indirect_data_stride == 0 ||
+       indirect_data_stride == sizeof(VkDrawIndirectCommand));
+   UNUSED const bool execute_indirect_supported =
+      execute_indirect_draw_supported(cmd_buffer);
 
    genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
 
@@ -4918,8 +4944,6 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer,
        */
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
-      load_indirect_parameters(cmd_buffer, draw, indexed, i);
-
       /* Wa_1306463417, Wa_16011107343 - Send HS state for every primitive,
        * first one was handled by cmd_buffer_flush_gfx_state.
        */
@@ -4927,17 +4951,44 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer,
          genX(emit_hs)(cmd_buffer);
       genX(emit_ds)(cmd_buffer);
 
-      genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
-      anv_batch_emit(&cmd_buffer->batch, _3DPRIMITIVE_DIRECT, prim) {
+      if (execute_indirect_supported) {
 #if GFX_VERx10 >= 125
-         prim.TBIMREnable = cmd_buffer->state.gfx.dyn_state.use_tbimr;
+         genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
+         anv_batch_emit(&cmd_buffer->batch, GENX(EXECUTE_INDIRECT_DRAW), ind) {
+            ind.ArgumentFormat             = DRAW;
+            ind.TBIMREnabled               = 
cmd_buffer->state.gfx.dyn_state.use_tbimr;
+            ind.PredicateEnable            =
+               cmd_buffer->state.conditional_render_enabled;
+            ind.MaxCount                   = aligned_stride ? draw_count : 1;
+            ind.ArgumentBufferStartAddress = draw;
+            ind.MOCS                       =
+               anv_mocs(cmd_buffer->device, draw.bo, 0);
+         }
+         /* If all the indirect structures are aligned, then we can let the HW
+          * do the unrolling and we only need one instruction. Otherwise we
+          * need to emit one instruction per draw, but we're still avoiding
+          * the register loads with MI commands.
+          */
+         if (aligned_stride)
+            break;
+#else
+         unreachable("EXECUTE_INDIRECT_DRAW instruction expectation mismatch");
 #endif
-         prim.IndirectParameterEnable  = true;
-         prim.PredicateEnable          = 
cmd_buffer->state.conditional_render_enabled;
-         prim.VertexAccessType         = indexed ? RANDOM : SEQUENTIAL;
+      } else {
+         load_indirect_parameters(cmd_buffer, draw, indexed, i);
+
+         genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
+         anv_batch_emit(&cmd_buffer->batch, _3DPRIMITIVE_DIRECT, prim) {
+#if GFX_VERx10 >= 125
+            prim.TBIMREnable = cmd_buffer->state.gfx.dyn_state.use_tbimr;
+#endif
+            prim.IndirectParameterEnable  = true;
+            prim.PredicateEnable          = 
cmd_buffer->state.conditional_render_enabled;
+            prim.VertexAccessType         = indexed ? RANDOM : SEQUENTIAL;
 #if GFX_VER >= 11
-         prim.ExtendedParametersPresent = true;
+            prim.ExtendedParametersPresent = true;
 #endif
+         }
       }
 
       genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch,

Reply via email to