Module: Mesa Branch: main Commit: 76960e2d93501348a4ef4d65a26120ea764682b9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=76960e2d93501348a4ef4d65a26120ea764682b9
Author: Samuel Pitoiset <[email protected]> Date: Mon Aug 29 18:35:52 2022 +0200 radv: move emitting GE_CNTL for non-NGG pipelines from the cmdbuf GE_CNTL is the equivalent of IA_MULTI_VGT_PARAM on GFX9 and older. Calling this function for every draw shouldn't really hurt in practice because only non-NGG pipelines need this. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18344> --- src/amd/vulkan/radv_cmd_buffer.c | 46 +++++++++++++++++++++++++++++++++++++++- src/amd/vulkan/radv_pipeline.c | 35 ------------------------------ src/amd/vulkan/radv_private.h | 1 + 3 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 852ceb9735c..de4a9acd7d0 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4010,6 +4010,44 @@ si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dr } } +static void +gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; + struct radv_cmd_state *state = &cmd_buffer->state; + bool break_wave_at_eoi = false; + unsigned primgroup_size; + unsigned ge_cntl; + + if (pipeline->is_ngg) + return; + + if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { + primgroup_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; + + if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || + radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) { + break_wave_at_eoi = true; + } + } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { + const struct gfx9_gs_info *gs_state = + &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; + primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl); + } else { + primgroup_size = 128; /* recommended without a GS and tess */ + } + + ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | + S_03096C_VERT_GRP_SIZE(256) | /* disable vertex grouping */ + S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ | + S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); + + if (state->last_ge_cntl != ge_cntl) { + radeon_set_uconfig_reg(cmd_buffer->cs, R_03096C_GE_CNTL, ge_cntl); + state->last_ge_cntl = ge_cntl; + } +} + static void radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) { @@ -4020,7 +4058,9 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d bool disable_instance_packing = false; /* Draw state. */ - if (info->gfx_level < GFX10) { + if (info->gfx_level >= GFX10) { + gfx10_emit_ge_cntl(cmd_buffer); + } else { si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect, !!draw_info->strmout_buffer, draw_info->indirect ? 0 : draw_info->count); @@ -5707,6 +5747,10 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param; } + if (secondary->state.last_ge_cntl) { + primary->state.last_ge_cntl = secondary->state.last_ge_cntl; + } + primary->state.last_first_instance = secondary->state.last_first_instance; primary->state.last_num_instances = secondary->state.last_num_instances; primary->state.last_drawid = secondary->state.last_drawid; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 4949e69177c..bf8ae5a710a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -5624,38 +5624,6 @@ radv_pipeline_emit_cliprect_rule(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule); } -static void -gfx10_pipeline_emit_ge_cntl(struct radeon_cmdbuf *ctx_cs, - const struct radv_graphics_pipeline *pipeline) -{ - bool break_wave_at_eoi = false; - unsigned primgroup_size; - unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */ - - if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { - primgroup_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; - } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { - const struct gfx9_gs_info *gs_state = - &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; - unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl; - primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl); - } else { - primgroup_size = 128; /* recommended without a GS and tess */ - } - - if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { - if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || - radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) - break_wave_at_eoi = true; - } - - radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, - S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | - S_03096C_VERT_GRP_SIZE(vertgroup_size) | - S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ | - S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi)); -} - static void radv_pipeline_emit_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline, @@ -5792,9 +5760,6 @@ radv_pipeline_emit_pm4(struct radv_graphics_pipeline *pipeline, radv_pipeline_emit_cliprect_rule(ctx_cs, state); radv_pipeline_emit_vgt_gs_out(ctx_cs, pipeline, vgt_gs_out_prim_type); - if (pdevice->rad_info.gfx_level >= GFX10 && !radv_pipeline_has_ngg(pipeline)) - gfx10_pipeline_emit_ge_cntl(ctx_cs, pipeline); - if (pdevice->rad_info.gfx_level >= GFX10_3) { gfx103_pipeline_emit_vgt_draw_payload_cntl(ctx_cs, pipeline, state); gfx103_pipeline_emit_vrs_state(ctx_cs, pipeline, state); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 5c410c8fc43..3e536488de0 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1487,6 +1487,7 @@ struct radv_cmd_state { bool prims_gen_query_enabled; uint32_t trace_id; uint32_t last_ia_multi_vgt_param; + uint32_t last_ge_cntl; uint32_t last_num_instances; uint32_t last_first_instance;
