Module: Mesa Branch: main Commit: 5cb20b5edc6e6fcde49490bb6ac832023d001e5b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5cb20b5edc6e6fcde49490bb6ac832023d001e5b
Author: Sviatoslav Peleshko <sviatoslav.peles...@globallogic.com> Date: Thu Nov 30 00:42:07 2023 +0200 anv: Fix MI_ARB_CHECK calls in generated indirect draws optimization According to PRMs, to use self-modifying code correctly we have to disable preparser before jumping to the generated commands, and re-enable it with a first command in that buffer. Old implementation did it wrong: for both inplace and inring generation it disabled preparser before running the generation shader, had it disabled during generation, and re-enabled it just before jumping to the generated commands. This usually didn't cause any trouble, because the generation shader and generated draws are in different BOs, and the jump distance is greater than the command FIFO depth. But we allocate them from the same pool, so there are rare cases where the end of the BO with generation commands, and the beginning of the BO with generated draws are adjacent. In such cases, the wrong commands might be fetched. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10162 Signed-off-by: Sviatoslav Peleshko <sviatoslav.peles...@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26427> --- .../vulkan/genX_cmd_draw_generated_indirect.h | 68 +++++++++++++--------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 217124400d8..b3c9db48932 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -119,13 +119,6 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, static void genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer) { -#if GFX_VER >= 12 - anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { - arb.PreParserDisableMask = true; - arb.PreParserDisable = true; - } -#endif - anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4); trace_intel_begin_generate_draws(&cmd_buffer->trace); @@ -138,6 +131,13 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch); +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = false; + } +#endif + trace_intel_end_generate_draws(&cmd_buffer->trace); struct anv_device *device = cmd_buffer->device; @@ -364,6 +364,9 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd if (cmd_buffer->generation.ring_bo == NULL) { const uint32_t bo_size = align( +#if GFX_VER >= 12 + GENX(MI_ARB_CHECK_length) * 4 + +#endif draw_cmd_stride * MAX_RING_BO_ITEMS + #if GFX_VER == 9 4 * MAX_RING_BO_ITEMS + @@ -386,6 +389,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd /* The ring bo has the following layout: * * -------------------------------------------------- + * | MI_ARB_CHECK to resume CS prefetch (Gfx12+) | + * |------------------------------------------------| * | ring_count * 3DPRIMITIVE | * |------------------------------------------------| * | jump instruction (either back to generate more | @@ -401,6 +406,22 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd GENX(MI_BATCH_BUFFER_START_length) * 4, }; + struct anv_address draw_cmds_addr = (struct anv_address) { + .bo = cmd_buffer->generation.ring_bo, +#if GFX_VER >= 12 + .offset = GENX(MI_ARB_CHECK_length) * 4, +#endif + }; + +#if GFX_VER >= 12 + struct GENX(MI_ARB_CHECK) resume_prefetch = { + .PreParserDisableMask = true, + .PreParserDisable = false, + }; + GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map, + &resume_prefetch); +#endif + #if GFX_VER == 9 /* Mark the VB-0 as using the entire ring_bo, but only for the draw call * starting the generation batch. All the following ones will use the same @@ -448,16 +469,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd */ struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch); -#if GFX_VER >= 12 - /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter - * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { - arb.PreParserDisableMask = true; - arb.PreParserDisable = true; - } -#endif - struct anv_simple_shader simple_state = (struct anv_simple_shader) { .device = device, .cmd_buffer = cmd_buffer, @@ -474,9 +485,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd genX(cmd_buffer_emit_generate_draws)( cmd_buffer, &simple_state, - (struct anv_address) { - .bo = cmd_buffer->generation.ring_bo, - }, + draw_cmds_addr, draw_cmd_stride, indirect_data_addr, indirect_data_stride, @@ -497,13 +506,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd ANV_PIPE_CS_STALL_BIT, "after generation flush"); -#if GFX_VER >= 12 - anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { - arb.PreParserDisableMask = true; - arb.PreParserDisable = false; - } -#endif - trace_intel_end_generate_draws(&cmd_buffer->trace); if (cmd_buffer->state.conditional_render_enabled) @@ -513,6 +515,16 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd genX(cmd_buffer_flush_gfx_state)(cmd_buffer); if (max_draw_count > 0) { +#if GFX_VER >= 12 + /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter + * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = true; + } +#endif + /* Jump into the ring buffer. */ anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) { bbs.AddressSpaceIndicator = ASI_PPGTT; @@ -650,7 +662,7 @@ genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer) #if GFX_VER >= 12 anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) { arb.PreParserDisableMask = true; - arb.PreParserDisable = false; + arb.PreParserDisable = true; } #else /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter