Module: Mesa
Branch: main
Commit: 5cb20b5edc6e6fcde49490bb6ac832023d001e5b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5cb20b5edc6e6fcde49490bb6ac832023d001e5b

Author: Sviatoslav Peleshko <sviatoslav.peles...@globallogic.com>
Date:   Thu Nov 30 00:42:07 2023 +0200

anv: Fix MI_ARB_CHECK calls in generated indirect draws optimization

According to PRMs, to use self-modifying code correctly we have to
disable preparser before jumping to the generated commands, and re-enable
it with a first command in that buffer.

Old implementation did it wrong: for both inplace and inring generation
it disabled preparser before running the generation shader, had it
disabled during generation, and re-enabled it just before jumping to
the generated commands.

This usually didn't cause any trouble, because the generation shader and
generated draws are in different BOs, and the jump distance is greater than
the command FIFO depth. But we allocate them from the same pool,
so there are rare cases where the end of the BO with generation commands,
and the beginning of the BO with generated draws are adjacent. In such
cases, the wrong commands might be fetched.

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10162
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peles...@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26427>

---

 .../vulkan/genX_cmd_draw_generated_indirect.h      | 68 +++++++++++++---------
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h 
b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
index 217124400d8..b3c9db48932 100644
--- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
+++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
@@ -119,13 +119,6 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer 
*cmd_buffer,
 static void
 genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer 
*cmd_buffer)
 {
-#if GFX_VER >= 12
-   anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
-      arb.PreParserDisableMask = true;
-      arb.PreParserDisable = true;
-   }
-#endif
-
    anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
 
    trace_intel_begin_generate_draws(&cmd_buffer->trace);
@@ -138,6 +131,13 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct 
anv_cmd_buffer *cmd_b
 
    cmd_buffer->generation.return_addr = 
anv_batch_current_address(&cmd_buffer->batch);
 
+#if GFX_VER >= 12
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
+      arb.PreParserDisableMask = true;
+      arb.PreParserDisable = false;
+   }
+#endif
+
    trace_intel_end_generate_draws(&cmd_buffer->trace);
 
    struct anv_device *device = cmd_buffer->device;
@@ -364,6 +364,9 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
 
    if (cmd_buffer->generation.ring_bo == NULL) {
       const uint32_t bo_size = align(
+#if GFX_VER >= 12
+         GENX(MI_ARB_CHECK_length) * 4 +
+#endif
          draw_cmd_stride * MAX_RING_BO_ITEMS +
 #if GFX_VER == 9
          4 * MAX_RING_BO_ITEMS +
@@ -386,6 +389,8 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
    /* The ring bo has the following layout:
     *
     *   --------------------------------------------------
+    *   | MI_ARB_CHECK to resume CS prefetch (Gfx12+)    |
+    *   |------------------------------------------------|
     *   |            ring_count * 3DPRIMITIVE            |
     *   |------------------------------------------------|
     *   | jump instruction (either back to generate more |
@@ -401,6 +406,22 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
                 GENX(MI_BATCH_BUFFER_START_length) * 4,
    };
 
+   struct anv_address draw_cmds_addr = (struct anv_address) {
+      .bo = cmd_buffer->generation.ring_bo,
+#if GFX_VER >= 12
+      .offset = GENX(MI_ARB_CHECK_length) * 4,
+#endif
+   };
+
+#if GFX_VER >= 12
+   struct GENX(MI_ARB_CHECK) resume_prefetch = {
+      .PreParserDisableMask = true,
+      .PreParserDisable = false,
+   };
+   GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
+                           &resume_prefetch);
+#endif
+
 #if GFX_VER == 9
    /* Mark the VB-0 as using the entire ring_bo, but only for the draw call
     * starting the generation batch. All the following ones will use the same
@@ -448,16 +469,6 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
     */
    struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
 
-#if GFX_VER >= 12
-   /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
-    * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
-    */
-   anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
-      arb.PreParserDisableMask = true;
-      arb.PreParserDisable = true;
-   }
-#endif
-
    struct anv_simple_shader simple_state = (struct anv_simple_shader) {
       .device               = device,
       .cmd_buffer           = cmd_buffer,
@@ -474,9 +485,7 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
       genX(cmd_buffer_emit_generate_draws)(
          cmd_buffer,
          &simple_state,
-         (struct anv_address) {
-            .bo = cmd_buffer->generation.ring_bo,
-         },
+         draw_cmds_addr,
          draw_cmd_stride,
          indirect_data_addr,
          indirect_data_stride,
@@ -497,13 +506,6 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
                              ANV_PIPE_CS_STALL_BIT,
                              "after generation flush");
 
-#if GFX_VER >= 12
-   anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
-      arb.PreParserDisableMask = true;
-      arb.PreParserDisable = false;
-   }
-#endif
-
    trace_intel_end_generate_draws(&cmd_buffer->trace);
 
    if (cmd_buffer->state.conditional_render_enabled)
@@ -513,6 +515,16 @@ 
genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
    genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
 
    if (max_draw_count > 0) {
+#if GFX_VER >= 12
+      /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
+       * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
+         arb.PreParserDisableMask = true;
+         arb.PreParserDisable = true;
+      }
+#endif
+
       /* Jump into the ring buffer. */
       anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
          bbs.AddressSpaceIndicator = ASI_PPGTT;
@@ -650,7 +662,7 @@ genX(cmd_buffer_flush_generated_draws)(struct 
anv_cmd_buffer *cmd_buffer)
 #if GFX_VER >= 12
    anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
       arb.PreParserDisableMask = true;
-      arb.PreParserDisable = false;
+      arb.PreParserDisable = true;
    }
 #else
    /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter

Reply via email to