From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 185 ++++++++++++++++++++++++-------
 1 file changed, 147 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fbeb265..8b21ff7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5616,114 +5616,220 @@ static unsigned si_get_max_workgroup_size(struct 
si_shader *shader)
 
        if (!max_work_group_size) {
                /* This is a variable group size compute shader,
                 * compile it for the maximum possible group size.
                 */
                max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
        }
        return max_work_group_size;
 }
 
+static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
+                                           LLVMTypeRef *params,
+                                           unsigned *num_params,
+                                           bool assign_params)
+{
+       params[(*num_params)++] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
+       params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
+       params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_IMAGES);
+       params[(*num_params)++] = const_array(ctx->v4i32, 
SI_NUM_SHADER_BUFFERS);
+
+       if (assign_params) {
+               ctx->param_const_buffers  = *num_params - 4;
+               ctx->param_samplers       = *num_params - 3;
+               ctx->param_images         = *num_params - 2;
+               ctx->param_shader_buffers = *num_params - 1;
+       }
+}
+
+static void declare_default_desc_pointers(struct si_shader_context *ctx,
+                                         LLVMTypeRef *params,
+                                         unsigned *num_params)
+{
+       params[ctx->param_rw_buffers = (*num_params)++] =
+               const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+       declare_per_stage_desc_pointers(ctx, params, num_params, true);
+}
+
+static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
+                                           LLVMTypeRef *params,
+                                           unsigned *num_params)
+{
+       params[ctx->param_vertex_buffers = (*num_params)++] =
+               const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS);
+       params[ctx->param_base_vertex = (*num_params)++] = ctx->i32;
+       params[ctx->param_start_instance = (*num_params)++] = ctx->i32;
+       params[ctx->param_draw_id = (*num_params)++] = ctx->i32;
+       params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32;
+}
+
+static void declare_vs_input_vgprs(struct si_shader_context *ctx,
+                                  LLVMTypeRef *params, unsigned *num_params,
+                                  unsigned *num_prolog_vgprs)
+{
+       struct si_shader *shader = ctx->shader;
+
+       params[ctx->param_vertex_id = (*num_params)++] = ctx->i32;
+       params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32;
+       params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32;
+       params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+
+       if (!shader->is_gs_copy_shader) {
+               /* Vertex load indices. */
+               ctx->param_vertex_index0 = (*num_params);
+               for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
+                       params[(*num_params)++] = ctx->i32;
+               *num_prolog_vgprs += shader->selector->info.num_inputs;
+       }
+}
+
+enum {
+       /* Convenient merged shader definitions. */
+       SI_SHADER_MERGED_VERTEX_TESSCTRL = PIPE_SHADER_TYPES,
+       SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY,
+};
+
 static void create_function(struct si_shader_context *ctx)
 {
        struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        struct gallivm_state *gallivm = &ctx->gallivm;
        struct si_shader *shader = ctx->shader;
-       LLVMTypeRef params[SI_NUM_PARAMS + SI_MAX_ATTRIBS], v3i32;
+       LLVMTypeRef params[100]; /* just make it large enough */
        LLVMTypeRef returns[16+32*4];
        unsigned i, last_sgpr, num_params = 0, num_return_sgprs;
        unsigned num_returns = 0;
        unsigned num_prolog_vgprs = 0;
+       unsigned type = ctx->type;
 
-       v3i32 = LLVMVectorType(ctx->i32, 3);
+       /* Set MERGED shaders. */
+       if (ctx->screen->b.chip_class >= GFX9) {
+               if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
+                       type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
+               else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY)
+                       type = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
+       }
 
-       params[ctx->param_rw_buffers = num_params++] =
-               const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
-       params[ctx->param_const_buffers = num_params++] =
-               const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
-       params[ctx->param_samplers = num_params++] =
-               const_array(ctx->v8i32, SI_NUM_SAMPLERS);
-       params[ctx->param_images = num_params++] =
-               const_array(ctx->v8i32, SI_NUM_IMAGES);
-       params[ctx->param_shader_buffers = num_params++] =
-               const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
+       LLVMTypeRef v3i32 = LLVMVectorType(ctx->i32, 3);
 
-       switch (ctx->type) {
+       switch (type) {
        case PIPE_SHADER_VERTEX:
-               params[ctx->param_vertex_buffers = num_params++] =
-                       const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS);
-               params[ctx->param_base_vertex = num_params++] = ctx->i32;
-               params[ctx->param_start_instance = num_params++] = ctx->i32;
-               params[ctx->param_draw_id = num_params++] = ctx->i32;
-               params[ctx->param_vs_state_bits = num_params++] = ctx->i32;
+               declare_default_desc_pointers(ctx, params, &num_params);
+               declare_vs_specific_input_sgprs(ctx, params, &num_params);
 
                if (shader->key.as_es) {
                        params[ctx->param_es2gs_offset = num_params++] = 
ctx->i32;
                } else if (shader->key.as_ls) {
                        /* no extra parameters */
                } else {
                        if (shader->is_gs_copy_shader)
                                num_params = ctx->param_rw_buffers + 1;
 
                        /* The locations of the other parameters are assigned 
dynamically. */
                        declare_streamout_params(ctx, &shader->selector->so,
                                                 params, ctx->i32, &num_params);
                }
 
                last_sgpr = num_params-1;
 
                /* VGPRs */
-               params[ctx->param_vertex_id = num_params++] = ctx->i32;
-               params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
-               params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
-               params[ctx->param_instance_id = num_params++] = ctx->i32;
-
-               if (!shader->is_gs_copy_shader) {
-                       /* Vertex load indices. */
-                       ctx->param_vertex_index0 = num_params;
+               declare_vs_input_vgprs(ctx, params, &num_params,
+                                      &num_prolog_vgprs);
 
-                       for (i = 0; i < shader->selector->info.num_inputs; i++)
-                               params[num_params++] = ctx->i32;
-
-                       num_prolog_vgprs += shader->selector->info.num_inputs;
-
-                       /* PrimitiveID output. */
-                       if (!shader->key.as_es && !shader->key.as_ls)
-                               for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
-                                       returns[num_returns++] = ctx->f32;
+               /* PrimitiveID output. */
+               if (!shader->is_gs_copy_shader &&
+                   !shader->key.as_es && !shader->key.as_ls) {
+                       for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+                               returns[num_returns++] = ctx->f32;
                }
                break;
 
-       case PIPE_SHADER_TESS_CTRL:
+       case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */
+               declare_default_desc_pointers(ctx, params, &num_params);
                params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
                params[ctx->param_tcs_out_lds_offsets = num_params++] = 
ctx->i32;
                params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
                params[ctx->param_vs_state_bits = num_params++] = ctx->i32;
                params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
                params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
                last_sgpr = num_params - 1;
 
                /* VGPRs */
                params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
                params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
 
                /* param_tcs_offchip_offset and param_tcs_factor_offset are
                 * placed after the user SGPRs.
                 */
                for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
                        returns[num_returns++] = ctx->i32; /* SGPRs */
-
                for (i = 0; i < 3; i++)
                        returns[num_returns++] = ctx->f32; /* VGPRs */
                break;
 
+       case SI_SHADER_MERGED_VERTEX_TESSCTRL:
+               /* Merged stages have 8 system SGPRs at the beginning. */
+               params[num_params++] = ctx->i32; /* unused */
+               params[num_params++] = ctx->i32; /* unused */
+               params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+               params[num_params++] = ctx->i32; /* wave thread counts for LS 
and HS */
+               params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
+               params[num_params++] = ctx->i32; /* scratch wave offset */
+               params[num_params++] = ctx->i32; /* unused */
+               params[num_params++] = ctx->i32; /* unused */
+
+               params[ctx->param_rw_buffers = num_params++] =
+                       const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+               declare_per_stage_desc_pointers(ctx, params, &num_params,
+                                               ctx->type == 
PIPE_SHADER_VERTEX);
+               declare_vs_specific_input_sgprs(ctx, params, &num_params);
+
+               params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+               params[ctx->param_tcs_out_lds_offsets = num_params++] = 
ctx->i32;
+               params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
+               params[num_params++] = ctx->i32; /* unused */
+
+               declare_per_stage_desc_pointers(ctx, params, &num_params,
+                                               ctx->type == 
PIPE_SHADER_TESS_CTRL);
+               last_sgpr = num_params - 1;
+
+               /* VGPRs (first TCS, then VS) */
+               params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
+               params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
+
+               if (ctx->type == PIPE_SHADER_VERTEX) {
+                       declare_vs_input_vgprs(ctx, params, &num_params,
+                                              &num_prolog_vgprs);
+
+                       /* LS return values are inputs to the TCS main shader 
part. */
+                       for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
+                               returns[num_returns++] = ctx->i32; /* SGPRs */
+                       for (i = 0; i < 2; i++)
+                               returns[num_returns++] = ctx->f32; /* VGPRs */
+               } else {
+                       /* TCS return values are inputs to the TCS epilog.
+                        *
+                        * param_tcs_offchip_offset and param_tcs_factor_offset
+                        * should be passed to the epilog.
+                        */
+                       for (i = 0; i <= ctx->param_tcs_factor_offset; i++)
+                               returns[num_returns++] = ctx->i32; /* SGPRs */
+                       for (i = 0; i < 3; i++)
+                               returns[num_returns++] = ctx->f32; /* VGPRs */
+               }
+               break;
+
+       case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
+               assert(!"unimplemented merged ES-GS shader");
+               break;
+
        case PIPE_SHADER_TESS_EVAL:
+               declare_default_desc_pointers(ctx, params, &num_params);
                params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
 
                if (shader->key.as_es) {
                        params[ctx->param_tcs_offchip_offset = num_params++] = 
ctx->i32;
                        params[num_params++] = ctx->i32;
                        params[ctx->param_es2gs_offset = num_params++] = 
ctx->i32;
                } else {
                        params[num_params++] = ctx->i32;
                        declare_streamout_params(ctx, &shader->selector->so,
                                                 params, ctx->i32, &num_params);
@@ -5737,36 +5843,38 @@ static void create_function(struct si_shader_context 
*ctx)
                params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
                params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
 
                /* PrimitiveID output. */
                if (!shader->key.as_es)
                        for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
                                returns[num_returns++] = ctx->f32;
                break;
 
        case PIPE_SHADER_GEOMETRY:
+               declare_default_desc_pointers(ctx, params, &num_params);
                params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_wave_id = num_params++] = ctx->i32;
                last_sgpr = num_params - 1;
 
                /* VGPRs */
                params[ctx->param_gs_vtx0_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_vtx1_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
                params[ctx->param_gs_vtx2_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_vtx3_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_vtx4_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_vtx5_offset = num_params++] = ctx->i32;
                params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
                break;
 
        case PIPE_SHADER_FRAGMENT:
+               declare_default_desc_pointers(ctx, params, &num_params);
                params[SI_PARAM_ALPHA_REF] = ctx->f32;
                params[SI_PARAM_PRIM_MASK] = ctx->i32;
                last_sgpr = SI_PARAM_PRIM_MASK;
                params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
                params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
                params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
                params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
                params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
                params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
                params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
@@ -5808,20 +5916,21 @@ static void create_function(struct si_shader_context 
*ctx)
                                   num_return_sgprs +
                                   PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
 
                for (i = 0; i < num_return_sgprs; i++)
                        returns[i] = ctx->i32;
                for (; i < num_returns; i++)
                        returns[i] = ctx->f32;
                break;
 
        case PIPE_SHADER_COMPUTE:
+               declare_default_desc_pointers(ctx, params, &num_params);
                params[SI_PARAM_GRID_SIZE] = v3i32;
                params[SI_PARAM_BLOCK_SIZE] = v3i32;
                params[SI_PARAM_BLOCK_ID] = v3i32;
                last_sgpr = SI_PARAM_BLOCK_ID;
 
                params[SI_PARAM_THREAD_ID] = v3i32;
                num_params = SI_PARAM_THREAD_ID + 1;
                break;
        default:
                assert(0 && "unimplemented shader");
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to