Signed-off-by: Sonny Jiang <sonny.ji...@amd.com>
---
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |  15 ++-
 src/gallium/drivers/radeonsi/si_state.h       |  19 ++-
 .../drivers/radeonsi/si_state_shaders.c       | 112 ++++++++++++------
 3 files changed, 109 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 99376c4847..39a97bf3da 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -352,9 +352,22 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]  
= 0x3f800000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]     
= 0xffff;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE]    
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT]  = 
0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE]  = 
0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3]  
= 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT]  = 
0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL]  = 
0x00000000;
+               
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP]  = 
0x00000000;
 
                /* Set all saved registers state to saved. */
-               ctx->tracked_regs.reg_saved = 0xffffffff;
+               ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
        } else {
                /* Set all saved registers state to unknown. */
                ctx->tracked_regs.reg_saved = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 7fb09ca953..9efb4be5a5 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -279,11 +279,28 @@ enum si_tracked_reg {
 
        SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
 
+       SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 4 consecutive registers */
+       SI_TRACKED_VGT_GSVS_RING_OFFSET_2,
+       SI_TRACKED_VGT_GSVS_RING_OFFSET_3,
+       SI_TRACKED_VGT_GS_OUT_PRIM_TYPE,
+
+       SI_TRACKED_VGT_GSVS_RING_ITEMSIZE,
+       SI_TRACKED_VGT_GS_MAX_VERT_OUT,
+
+       SI_TRACKED_VGT_GS_VERT_ITEMSIZE, /* 4 consecutive registers */
+       SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1,
+       SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2,
+       SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3,
+
+       SI_TRACKED_VGT_GS_INSTANCE_CNT,
+       SI_TRACKED_VGT_GS_ONCHIP_CNTL,
+       SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
+
        SI_NUM_TRACKED_REGS,
 };
 
 struct si_tracked_regs {
-       uint32_t                reg_saved;
+       uint64_t                reg_saved;
        uint32_t                reg_value[SI_NUM_TRACKED_REGS];
        uint32_t                spi_ps_input_cntl[32];
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0716021d85..87b58cd6e5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -747,48 +747,99 @@ static void gfx9_get_gs_info(struct si_shader_selector 
*es,
        assert(out->max_prims_per_subgroup <= max_out_prims);
 }
 
-static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
+static void si_emit_shader_gs(struct si_context *sctx)
 {
+       struct si_shader *shader = sctx->queued.named.gs->shader;
+       if (!shader)
+               return;
+
        struct si_shader_selector *sel = shader->selector;
        const ubyte *num_components = sel->info.num_stream_output_components;
        unsigned gs_num_invocations = sel->gs_num_invocations;
-       struct si_pm4_state *pm4;
        uint64_t va;
        unsigned max_stream = sel->max_gs_stream;
-       unsigned offset;
+       unsigned offset_1, offset_2, offset_3, vgt_gsvs_ring_itemsize;
 
-       pm4 = si_get_shader_pm4_state(shader);
-       if (!pm4)
-               return;
-
-       offset = num_components[0] * sel->gs_max_out_vertices;
-       si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, offset);
+       offset_3 = offset_2 = offset_1 = num_components[0] * 
sel->gs_max_out_vertices;
        if (max_stream >= 1)
-               offset += num_components[1] * sel->gs_max_out_vertices;
-       si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, offset);
+               offset_3 = offset_2 += num_components[1] * 
sel->gs_max_out_vertices;
        if (max_stream >= 2)
-               offset += num_components[2] * sel->gs_max_out_vertices;
-       si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, offset);
-       si_pm4_set_reg(pm4, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
-                      si_conv_prim_to_gs_out(sel->gs_output_prim));
+               offset_3 += num_components[2] * sel->gs_max_out_vertices;
+
+
+       /* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
+        * R_028A68_VGT_GSVS_RING_OFFSET_3, R_028A6C_VGT_GS_OUT_PRIM_TYPE */
+       radeon_opt_set_context_reg4(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
+                                   SI_TRACKED_VGT_GSVS_RING_OFFSET_1,
+                                   offset_1, offset_2, offset_3,
+                                   
si_conv_prim_to_gs_out(sel->gs_output_prim));
+
+       vgt_gsvs_ring_itemsize = offset_3;
        if (max_stream >= 3)
-               offset += num_components[3] * sel->gs_max_out_vertices;
-       si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
+               vgt_gsvs_ring_itemsize += num_components[3] * 
sel->gs_max_out_vertices;
 
        /* The GSVS_RING_ITEMSIZE register takes 15 bits */
-       assert(offset < (1 << 15));
+       assert(vgt_gsvs_ring_itemsize < (1 << 15));
+
+       /* R_028AB0_VGT_GSVS_RING_ITEMSIZE */
+       radeon_opt_set_context_reg(sctx, R_028AB0_VGT_GSVS_RING_ITEMSIZE,
+                                  SI_TRACKED_VGT_GSVS_RING_ITEMSIZE,
+                                  vgt_gsvs_ring_itemsize);
+
+       /* R_028B38_VGT_GS_MAX_VERT_OUT */
+       radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT,
+                                  SI_TRACKED_VGT_GS_MAX_VERT_OUT,
+                                  sel->gs_max_out_vertices);
+
+       /* R_028B5C_VGT_GS_VERT_ITEMSIZE, R_028B60_VGT_GS_VERT_ITEMSIZE_1
+        * R_028B64_VGT_GS_VERT_ITEMSIZE_2, R_028B68_VGT_GS_VERT_ITEMSIZE_3 */
+       radeon_opt_set_context_reg4(sctx, R_028B5C_VGT_GS_VERT_ITEMSIZE,
+                                   SI_TRACKED_VGT_GS_VERT_ITEMSIZE,
+                                    num_components[0],
+                                    (max_stream >= 1) ? num_components[1] : 0,
+                                    (max_stream >= 2) ? num_components[2] : 0,
+                                    (max_stream >= 3) ? num_components[3] : 0);
+
+       /* R_028B90_VGT_GS_INSTANCE_CNT */
+       radeon_opt_set_context_reg(sctx, R_028B90_VGT_GS_INSTANCE_CNT,
+                                  SI_TRACKED_VGT_GS_INSTANCE_CNT,
+                                  S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+                                  S_028B90_ENABLE(gs_num_invocations > 0));
 
-       si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, 
sel->gs_max_out_vertices);
+       va = shader->bo->gpu_address;
 
-       si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, num_components[0]);
-       si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) 
? num_components[1] : 0);
-       si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, (max_stream >= 2) 
? num_components[2] : 0);
-       si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, (max_stream >= 3) 
? num_components[3] : 0);
+       if (sctx->chip_class >= GFX9) {
+               struct gfx9_gs_info gs_info;
+               gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
 
-       si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
-                      S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
-                      S_028B90_ENABLE(gs_num_invocations > 0));
+               /* R_028A44_VGT_GS_ONCHIP_CNTL */
+               radeon_opt_set_context_reg(sctx, R_028A44_VGT_GS_ONCHIP_CNTL,
+                               SI_TRACKED_VGT_GS_ONCHIP_CNTL,
+                               
S_028A44_ES_VERTS_PER_SUBGRP(gs_info.es_verts_per_subgroup) |
+                               
S_028A44_GS_PRIMS_PER_SUBGRP(gs_info.gs_prims_per_subgroup) |
+                               
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info.gs_inst_prims_in_subgroup));
+               /* R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP */
+               radeon_opt_set_context_reg(sctx, 
R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
+                               SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
+                               
S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info.max_prims_per_subgroup));
+               /* R_028AAC_VGT_ESGS_RING_ITEMSIZE */
+               radeon_opt_set_context_reg(sctx, 
R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+                               SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+                               shader->key.part.gs.es->esgs_itemsize / 4);
+       }
+}
+
+static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
+{
+       struct si_shader_selector *sel = shader->selector;
+       struct si_pm4_state *pm4;
+       uint64_t va;
 
+       pm4 = si_get_shader_pm4_state(shader);
+       if (!pm4)
+               return;
+
+       pm4->atom.emit = si_emit_shader_gs;
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_BINARY);
 
@@ -843,15 +894,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct 
si_shader *shader)
                               S_00B22C_LDS_SIZE(gs_info.lds_size) |
                               
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 
-               si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
-                              
S_028A44_ES_VERTS_PER_SUBGRP(gs_info.es_verts_per_subgroup) |
-                              
S_028A44_GS_PRIMS_PER_SUBGRP(gs_info.gs_prims_per_subgroup) |
-                              
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info.gs_inst_prims_in_subgroup));
-               si_pm4_set_reg(pm4, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
-                              
S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info.max_prims_per_subgroup));
-               si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-                              shader->key.part.gs.es->esgs_itemsize / 4);
-
                if (es_type == PIPE_SHADER_TESS_EVAL)
                        si_set_tesseval_regs(sscreen, shader->key.part.gs.es, 
pm4);
 
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to