Module: Mesa
Branch: main
Commit: 130428e758557872a16121c1aba4401b23e3d15d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=130428e758557872a16121c1aba4401b23e3d15d

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Sat Nov 18 22:59:55 2023 -0500

radeonsi: don't allocate output space for LAYER/VIEWPORT before TES and GS

The outputs are ignored according GL_ARB_shader_viewport_layer_array.

Reviewed-by: Qiang Yu <yuq...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26274>

---

 src/gallium/drivers/radeonsi/si_shader.c           | 10 +++++-----
 src/gallium/drivers/radeonsi/si_shader.h           |  2 +-
 src/gallium/drivers/radeonsi/si_shader_info.c      | 13 +++++++++----
 src/gallium/drivers/radeonsi/si_shader_llvm_tess.c |  3 +++
 src/gallium/drivers/radeonsi/si_state_shaders.cpp  |  4 ++--
 5 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 7b9e80b3cf1..8c0b9f4fbed 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -474,7 +474,7 @@ void si_init_shader_args(struct si_shader *shader, struct 
si_shader_args *args)
 
             /* VS outputs passed via VGPRs to TCS. */
             if (shader->key.ge.opt.same_patch_vertices && 
!sel->screen->use_aco) {
-               unsigned num_outputs = 
util_last_bit64(shader->selector->info.outputs_written);
+               unsigned num_outputs = 
util_last_bit64(shader->selector->info.outputs_written_before_tes_gs);
                for (i = 0; i < num_outputs * 4; i++)
                   ac_add_return(&args->ac, AC_ARG_VGPR);
             }
@@ -482,7 +482,7 @@ void si_init_shader_args(struct si_shader *shader, struct 
si_shader_args *args)
       } else {
          /* TCS inputs are passed via VGPRs from VS. */
          if (shader->key.ge.opt.same_patch_vertices && !sel->screen->use_aco) {
-            unsigned num_inputs = 
util_last_bit64(shader->previous_stage_sel->info.outputs_written);
+            unsigned num_inputs = 
util_last_bit64(shader->previous_stage_sel->info.outputs_written_before_tes_gs);
             for (i = 0; i < num_inputs * 4; i++)
                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
          }
@@ -1270,7 +1270,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen 
*screen, struct si_shad
                shader->key.ge.as_ngg)
          num_outputs = shader->info.nr_param_exports;
       else if (shader->selector->stage == MESA_SHADER_TESS_CTRL)
-         num_outputs = util_last_bit64(shader->selector->info.outputs_written);
+         num_outputs = 
util_last_bit64(shader->selector->info.outputs_written_before_tes_gs);
       else
          unreachable("invalid shader key");
    } else if (shader->selector->stage == MESA_SHADER_FRAGMENT) {
@@ -1794,7 +1794,7 @@ static bool si_lower_io_to_mem(struct si_shader *shader, 
nir_shader *nir,
                  /* Used by hs_emit_write_tess_factors() when monolithic 
shader. */
                  key->ge.part.tcs.epilog.tes_reads_tess_factors,
                  ~0ULL, ~0ULL, /* no TES inputs filter */
-                 util_last_bit64(sel->info.outputs_written),
+                 util_last_bit64(sel->info.outputs_written_before_tes_gs),
                  util_last_bit64(sel->info.patch_outputs_written),
                  shader->wave_size,
                  /* ALL TCS inputs are passed by register. */
@@ -3532,7 +3532,7 @@ nir_shader *si_get_prev_stage_nir_shader(struct si_shader 
*shader,
 unsigned si_get_tcs_out_patch_stride(const struct si_shader_info *info)
 {
    unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
-   unsigned vertex_stride = util_last_bit64(info->outputs_written) * 4;
+   unsigned vertex_stride = 
util_last_bit64(info->outputs_written_before_tes_gs) * 4;
    unsigned num_patch_outputs = util_last_bit64(info->patch_outputs_written);
 
    return tcs_out_vertices * vertex_stride + num_patch_outputs * 4;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 95038c70dba..cd59fe01bef 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -436,8 +436,8 @@ struct si_shader_info {
    uint64_t inputs_read; /* "get_unique_index" bits */
    uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not 
LDS. */
 
+   uint64_t outputs_written_before_tes_gs; /* "get_unique_index" bits */
    uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
-   uint64_t outputs_written;           /* "get_unique_index" bits */
    uint32_t patch_outputs_written;     /* "get_unique_index_patch" bits */
 
    uint8_t clipdist_mask;
diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c 
b/src/gallium/drivers/radeonsi/si_shader_info.c
index 84613c993cb..d0585ed6c25 100644
--- a/src/gallium/drivers/radeonsi/si_shader_info.c
+++ b/src/gallium/drivers/radeonsi/si_shader_info.c
@@ -745,8 +745,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, const 
struct nir_shader *nir,
             info->patch_outputs_written |= 1ull << 
ac_shader_io_get_unique_index_patch(semantic);
          } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= 
VARYING_SLOT_VAR0_16BIT) &&
                     semantic != VARYING_SLOT_EDGE) {
-            info->outputs_written |= 1ull << 
si_shader_io_get_unique_index(semantic);
-
             /* Ignore outputs that are not passed from VS to PS. */
             if (semantic != VARYING_SLOT_POS &&
                 semantic != VARYING_SLOT_PSIZ &&
@@ -754,6 +752,13 @@ void si_nir_scan_shader(struct si_screen *sscreen, const 
struct nir_shader *nir,
                info->outputs_written_before_ps |= 1ull
                                                   << 
si_shader_io_get_unique_index(semantic);
             }
+
+            /* LAYER and VIEWPORT have no effect if they don't feed the 
rasterizer. */
+            if (semantic != VARYING_SLOT_LAYER &&
+                semantic != VARYING_SLOT_VIEWPORT) {
+               info->outputs_written_before_tes_gs |=
+                  BITFIELD64_BIT(si_shader_io_get_unique_index(semantic));
+            }
          }
       }
    }
@@ -771,8 +776,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, const 
struct nir_shader *nir,
    if (nir->info.stage == MESA_SHADER_VERTEX ||
        nir->info.stage == MESA_SHADER_TESS_CTRL ||
        nir->info.stage == MESA_SHADER_TESS_EVAL) {
-      info->esgs_vertex_stride = util_last_bit64(info->outputs_written) * 16;
-      info->lshs_vertex_stride = info->esgs_vertex_stride;
+      info->esgs_vertex_stride = info->lshs_vertex_stride =
+         util_last_bit64(info->outputs_written_before_tes_gs) * 16;
 
       /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
        * will start on a different bank. (except for the maximum 32*16).
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index ffb321e0cc0..a31f3f7e318 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -488,6 +488,9 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
          unsigned semantic = info->output_semantic[i];
          int param = si_shader_io_get_unique_index(semantic);
 
+         if (!(info->outputs_written_before_tes_gs & BITFIELD64_BIT(param)))
+            continue;
+
          for (unsigned chan = 0; chan < 4; chan++) {
             if (!(info->output_usagemask[i] & (1 << chan)))
                continue;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp 
b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 5c14d49e335..fb2c9ce28bb 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -4319,7 +4319,7 @@ bool si_set_tcs_to_fixed_func_shader(struct si_context 
*sctx)
    }
 
    struct si_fixed_func_tcs_shader_key key;
-   key.outputs_written = sctx->shader.vs.cso->info.outputs_written;
+   key.outputs_written = 
sctx->shader.vs.cso->info.outputs_written_before_tes_gs;
    key.vertices_out = sctx->patch_vertices;
 
    struct hash_entry *entry = _mesa_hash_table_search(
@@ -4445,7 +4445,7 @@ void si_update_tess_io_layout_state(struct si_context 
*sctx)
 
    /* This calculates how shader inputs and outputs among VS, TCS, and TES
     * are laid out in LDS. */
-   unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
+   unsigned num_tcs_outputs = 
util_last_bit64(tcs->info.outputs_written_before_tes_gs);
    unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
    unsigned num_tcs_patch_outputs = 
util_last_bit64(tcs->info.patch_outputs_written);
 

Reply via email to