Module: Mesa Branch: main Commit: 130428e758557872a16121c1aba4401b23e3d15d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=130428e758557872a16121c1aba4401b23e3d15d
Author: Marek Olšák <marek.ol...@amd.com> Date: Sat Nov 18 22:59:55 2023 -0500 radeonsi: don't allocate output space for LAYER/VIEWPORT before TES and GS The outputs are ignored according GL_ARB_shader_viewport_layer_array. Reviewed-by: Qiang Yu <yuq...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26274> --- src/gallium/drivers/radeonsi/si_shader.c | 10 +++++----- src/gallium/drivers/radeonsi/si_shader.h | 2 +- src/gallium/drivers/radeonsi/si_shader_info.c | 13 +++++++++---- src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 3 +++ src/gallium/drivers/radeonsi/si_state_shaders.cpp | 4 ++-- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 7b9e80b3cf1..8c0b9f4fbed 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -474,7 +474,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) /* VS outputs passed via VGPRs to TCS. */ if (shader->key.ge.opt.same_patch_vertices && !sel->screen->use_aco) { - unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written); + unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written_before_tes_gs); for (i = 0; i < num_outputs * 4; i++) ac_add_return(&args->ac, AC_ARG_VGPR); } @@ -482,7 +482,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) } else { /* TCS inputs are passed via VGPRs from VS. */ if (shader->key.ge.opt.same_patch_vertices && !sel->screen->use_aco) { - unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written); + unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written_before_tes_gs); for (i = 0; i < num_inputs * 4; i++) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); } @@ -1270,7 +1270,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad shader->key.ge.as_ngg) num_outputs = shader->info.nr_param_exports; else if (shader->selector->stage == MESA_SHADER_TESS_CTRL) - num_outputs = util_last_bit64(shader->selector->info.outputs_written); + num_outputs = util_last_bit64(shader->selector->info.outputs_written_before_tes_gs); else unreachable("invalid shader key"); } else if (shader->selector->stage == MESA_SHADER_FRAGMENT) { @@ -1794,7 +1794,7 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir, /* Used by hs_emit_write_tess_factors() when monolithic shader. */ key->ge.part.tcs.epilog.tes_reads_tess_factors, ~0ULL, ~0ULL, /* no TES inputs filter */ - util_last_bit64(sel->info.outputs_written), + util_last_bit64(sel->info.outputs_written_before_tes_gs), util_last_bit64(sel->info.patch_outputs_written), shader->wave_size, /* ALL TCS inputs are passed by register. */ @@ -3532,7 +3532,7 @@ nir_shader *si_get_prev_stage_nir_shader(struct si_shader *shader, unsigned si_get_tcs_out_patch_stride(const struct si_shader_info *info) { unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out; - unsigned vertex_stride = util_last_bit64(info->outputs_written) * 4; + unsigned vertex_stride = util_last_bit64(info->outputs_written_before_tes_gs) * 4; unsigned num_patch_outputs = util_last_bit64(info->patch_outputs_written); return tcs_out_vertices * vertex_stride + num_patch_outputs * 4; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 95038c70dba..cd59fe01bef 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -436,8 +436,8 @@ struct si_shader_info { uint64_t inputs_read; /* "get_unique_index" bits */ uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */ + uint64_t outputs_written_before_tes_gs; /* "get_unique_index" bits */ uint64_t outputs_written_before_ps; /* "get_unique_index" bits */ - uint64_t outputs_written; /* "get_unique_index" bits */ uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */ uint8_t clipdist_mask; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 84613c993cb..d0585ed6c25 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -745,8 +745,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, info->patch_outputs_written |= 1ull << ac_shader_io_get_unique_index_patch(semantic); } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && semantic != VARYING_SLOT_EDGE) { - info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic); - /* Ignore outputs that are not passed from VS to PS. */ if (semantic != VARYING_SLOT_POS && semantic != VARYING_SLOT_PSIZ && @@ -754,6 +752,13 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, info->outputs_written_before_ps |= 1ull << si_shader_io_get_unique_index(semantic); } + + /* LAYER and VIEWPORT have no effect if they don't feed the rasterizer. */ + if (semantic != VARYING_SLOT_LAYER && + semantic != VARYING_SLOT_VIEWPORT) { + info->outputs_written_before_tes_gs |= + BITFIELD64_BIT(si_shader_io_get_unique_index(semantic)); + } } } } @@ -771,8 +776,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) { - info->esgs_vertex_stride = util_last_bit64(info->outputs_written) * 16; - info->lshs_vertex_stride = info->esgs_vertex_stride; + info->esgs_vertex_stride = info->lshs_vertex_stride = + util_last_bit64(info->outputs_written_before_tes_gs) * 16; /* Add 1 dword to reduce LDS bank conflicts, so that each vertex * will start on a different bank. (except for the maximum 32*16). diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index ffb321e0cc0..a31f3f7e318 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -488,6 +488,9 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx) unsigned semantic = info->output_semantic[i]; int param = si_shader_io_get_unique_index(semantic); + if (!(info->outputs_written_before_tes_gs & BITFIELD64_BIT(param))) + continue; + for (unsigned chan = 0; chan < 4; chan++) { if (!(info->output_usagemask[i] & (1 << chan))) continue; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 5c14d49e335..fb2c9ce28bb 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4319,7 +4319,7 @@ bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx) } struct si_fixed_func_tcs_shader_key key; - key.outputs_written = sctx->shader.vs.cso->info.outputs_written; + key.outputs_written = sctx->shader.vs.cso->info.outputs_written_before_tes_gs; key.vertices_out = sctx->patch_vertices; struct hash_entry *entry = _mesa_hash_table_search( @@ -4445,7 +4445,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ - unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written); + unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written_before_tes_gs); unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out; unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);