From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_compute.c        | 16 +++++++++++++---
 src/gallium/drivers/radeonsi/si_compute.h        |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h           |  1 +
 src/gallium/drivers/radeonsi/si_shader.c         | 11 +++++++++++
 src/gallium/drivers/radeonsi/si_shader.h         |  7 +++++++
 .../drivers/radeonsi/si_shader_internal.h        |  1 +
 6 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index ea6fa3e999d..c5d3d5fcf02 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -121,20 +121,22 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
                                 &program->active_samplers_and_images);
 
        program->shader.selector = &sel;
        program->shader.is_monolithic = true;
        program->uses_grid_size = sel.info.uses_grid_size;
        program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
        program->uses_bindless_images = sel.info.uses_bindless_images;
        program->reads_variable_block_size =
                sel.info.uses_block_size &&
                sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+       program->num_cs_user_data_dwords =
+               sel.info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
 
        void *ir_binary = si_get_ir_binary(&sel);
 
        /* Try to load the shader from the shader cache. */
        mtx_lock(&sscreen->shader_cache_mutex);
 
        if (ir_binary &&
            si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
                mtx_unlock(&sscreen->shader_cache_mutex);
 
@@ -152,21 +154,22 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
 
                        if (program->ir_type == PIPE_SHADER_IR_TGSI)
                                FREE(program->ir.tgsi);
                        program->shader.selector = NULL;
                        return;
                }
 
                bool scratch_enabled = shader->config.scratch_bytes_per_wave > 
0;
                unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
                                      (sel.info.uses_grid_size ? 3 : 0) +
-                                     (program->reads_variable_block_size ? 3 : 
0);
+                                     (program->reads_variable_block_size ? 3 : 
0) +
+                                     program->num_cs_user_data_dwords;
 
                shader->config.rsrc1 =
                        S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
                        S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
                        S_00B848_DX10_CLAMP(1) |
                        S_00B848_FLOAT_MODE(shader->config.float_mode);
 
                shader->config.rsrc2 =
                        S_00B84C_USER_SGPR(user_sgprs) |
                        S_00B84C_SCRATCH_EN(scratch_enabled) |
@@ -699,30 +702,32 @@ static bool si_upload_compute_input(struct si_context 
*sctx,
                radeon_emit(cs, kernel_args_va);
                radeon_emit(cs, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) 
|
                                S_008F04_STRIDE(0));
        }
 
        r600_resource_reference(&input_buffer, NULL);
 
        return true;
 }
 
-static void si_setup_tgsi_grid(struct si_context *sctx,
+static void si_setup_tgsi_user_data(struct si_context *sctx,
                                 const struct pipe_grid_info *info)
 {
        struct si_compute *program = sctx->cs_shader_state.program;
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
        unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
                                 4 * SI_NUM_RESOURCE_SGPRS;
        unsigned block_size_reg = grid_size_reg +
                                  /* 12 bytes = 3 dwords. */
                                  12 * program->uses_grid_size;
+       unsigned cs_user_data_reg = block_size_reg +
+                                   12 * program->reads_variable_block_size;
 
        if (info->indirect) {
                if (program->uses_grid_size) {
                        uint64_t base_va = 
r600_resource(info->indirect)->gpu_address;
                        uint64_t va = base_va + info->indirect_offset;
                        int i;
 
                        radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
                                         r600_resource(info->indirect),
                                         RADEON_USAGE_READ, 
RADEON_PRIO_DRAW_INDIRECT);
@@ -744,20 +749,25 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
                        radeon_emit(cs, info->grid[1]);
                        radeon_emit(cs, info->grid[2]);
                }
                if (program->reads_variable_block_size) {
                        radeon_set_sh_reg_seq(cs, block_size_reg, 3);
                        radeon_emit(cs, info->block[0]);
                        radeon_emit(cs, info->block[1]);
                        radeon_emit(cs, info->block[2]);
                }
        }
+
+       if (program->num_cs_user_data_dwords) {
+               radeon_set_sh_reg_seq(cs, cs_user_data_reg, 
program->num_cs_user_data_dwords);
+               radeon_emit_array(cs, sctx->cs_user_data, 
program->num_cs_user_data_dwords);
+       }
 }
 
 static void si_emit_dispatch_packets(struct si_context *sctx,
                                      const struct pipe_grid_info *info)
 {
        struct si_screen *sscreen = sctx->screen;
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
        bool render_cond_bit = sctx->render_cond && 
!sctx->render_cond_force_off;
        unsigned waves_per_threadgroup =
                DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 
64);
@@ -901,21 +911,21 @@ static void si_launch_grid(
                        r600_resource(program->global_buffers[i]);
                if (!buffer) {
                        continue;
                }
                radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
                                          RADEON_USAGE_READWRITE,
                                          RADEON_PRIO_COMPUTE_GLOBAL);
        }
 
        if (program->ir_type != PIPE_SHADER_IR_NATIVE)
-               si_setup_tgsi_grid(sctx, info);
+               si_setup_tgsi_user_data(sctx, info);
 
        si_emit_dispatch_packets(sctx, info);
 
        if (unlikely(sctx->current_saved_cs)) {
                si_trace_emit(sctx);
                si_log_compute_state(sctx, sctx->log);
        }
 
        sctx->compute_is_busy = true;
        sctx->num_compute_calls++;
diff --git a/src/gallium/drivers/radeonsi/si_compute.h 
b/src/gallium/drivers/radeonsi/si_compute.h
index ef8b4aec4df..99b501673c5 100644
--- a/src/gallium/drivers/radeonsi/si_compute.h
+++ b/src/gallium/drivers/radeonsi/si_compute.h
@@ -50,20 +50,21 @@ struct si_compute {
        unsigned private_size;
        unsigned input_size;
        struct si_shader shader;
 
        struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
        unsigned use_code_object_v2 : 1;
        unsigned uses_grid_size:1;
        unsigned uses_bindless_samplers:1;
        unsigned uses_bindless_images:1;
        bool reads_variable_block_size;
+       unsigned num_cs_user_data_dwords;
 };
 
 void si_destroy_compute(struct si_compute *program);
 
 static inline void
 si_compute_reference(struct si_compute **dst, struct si_compute *src)
 {
        if (pipe_reference(&(*dst)->reference, &src->reference))
                si_destroy_compute(*dst);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 4c3f13b84e2..100d0166f62 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -847,20 +847,21 @@ struct si_context {
        struct pipe_constant_buffer     null_const_buf; /* used for 
set_constant_buffer(NULL) on CIK */
        struct pipe_resource            *esgs_ring;
        struct pipe_resource            *gsvs_ring;
        struct pipe_resource            *tess_rings;
        union pipe_color_union          *border_color_table; /* in CPU memory, 
any endian */
        struct r600_resource            *border_color_buffer;
        union pipe_color_union          *border_color_map; /* in VRAM (slow 
access), little endian */
        unsigned                        border_color_count;
        unsigned                        num_vs_blit_sgprs;
        uint32_t                        
vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
+       uint32_t                        cs_user_data[4];
 
        /* Vertex and index buffers. */
        bool                            vertex_buffers_dirty;
        bool                            vertex_buffer_pointer_dirty;
        struct pipe_vertex_buffer       vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
        /* MSAA config state. */
        int                             ps_iter_samples;
        bool                            ps_uses_fbfetch;
        bool                            smoothing_enabled;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cfd99b61601..c51e91b1d3d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2260,20 +2260,24 @@ void si_load_system_value(struct si_shader_context *ctx,
                }
                id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
                value = LLVMBuildShl(ctx->ac.builder, value, id, "");
                if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
                    decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
                        value = LLVMBuildNot(ctx->ac.builder, value, "");
                value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, 
"");
                break;
        }
 
+       case TGSI_SEMANTIC_CS_USER_DATA:
+               value = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data);
+               break;
+
        default:
                assert(!"unknown system value");
                return;
        }
 
        ctx->system_values[index] = value;
 }
 
 void si_declare_compute_memory(struct si_shader_context *ctx)
 {
@@ -4941,20 +4945,27 @@ static void create_function(struct si_shader_context 
*ctx)
 
        case PIPE_SHADER_COMPUTE:
                declare_global_desc_pointers(ctx, &fninfo);
                declare_per_stage_desc_pointers(ctx, &fninfo, true);
                if (shader->selector->info.uses_grid_size)
                        add_arg_assign(&fninfo, ARG_SGPR, v3i32, 
&ctx->abi.num_work_groups);
                if (shader->selector->info.uses_block_size &&
                    
shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
                        ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, 
v3i32);
 
+               unsigned cs_user_data_dwords =
+                       
shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
+               if (cs_user_data_dwords) {
+                       ctx->param_cs_user_data = add_arg(&fninfo, ARG_SGPR,
+                                                         
LLVMVectorType(ctx->i32, cs_user_data_dwords));
+               }
+
                for (i = 0; i < 3; i++) {
                        ctx->abi.workgroup_ids[i] = NULL;
                        if (shader->selector->info.uses_block_id[i])
                                add_arg_assign(&fninfo, ARG_SGPR, ctx->i32, 
&ctx->abi.workgroup_ids[i]);
                }
 
                add_arg_assign(&fninfo, ARG_VGPR, v3i32, 
&ctx->abi.local_invocation_ids);
                break;
        default:
                assert(0 && "unimplemented shader");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 2dc4bc7e787..14230b82073 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -269,28 +269,35 @@ enum {
 #define C_VS_STATE_CLAMP_VERTEX_COLOR          0xFFFFFFFE
 #define S_VS_STATE_INDEXED(x)                  (((unsigned)(x) & 0x1) << 1)
 #define C_VS_STATE_INDEXED                     0xFFFFFFFD
 #define S_VS_STATE_LS_OUT_PATCH_SIZE(x)                (((unsigned)(x) & 
0x1FFF) << 8)
 #define C_VS_STATE_LS_OUT_PATCH_SIZE           0xFFE000FF
 #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x)       (((unsigned)(x) & 0xFF) << 24)
 #define C_VS_STATE_LS_OUT_VERTEX_SIZE          0x00FFFFFF
 
 /* SI-specific system values. */
 enum {
+       /* Values from set_tess_state. */
        TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
        TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
+
+       /* Up to 4 dwords in user SGPRs for compute shaders. */
+       TGSI_SEMANTIC_CS_USER_DATA,
 };
 
 enum {
        /* Use a property enum that CS wouldn't use. */
        TGSI_PROPERTY_CS_LOCAL_SIZE = TGSI_PROPERTY_FS_COORD_ORIGIN,
 
+       /* The number of used user data dwords in the range [1, 4]. */
+       TGSI_PROPERTY_CS_USER_DATA_DWORDS = TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
+
        /* Use a property enum that VS wouldn't use. */
        TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN,
 
        /* These represent the number of SGPRs the shader uses. */
        SI_VS_BLIT_SGPRS_POS = 3,
        SI_VS_BLIT_SGPRS_POS_COLOR = 7,
        SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
 };
 
 /* For VS shader key fix_fetch. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 6cc503690da..f187a06854d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -166,20 +166,21 @@ struct si_shader_context {
        int param_es2gs_offset;
        /* API GS */
        int param_gs2vs_offset;
        int param_gs_wave_id; /* GFX6 */
        LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
        int param_gs_vtx01_offset; /* in dwords (GFX9) */
        int param_gs_vtx23_offset; /* in dwords (GFX9) */
        int param_gs_vtx45_offset; /* in dwords (GFX9) */
        /* CS */
        int param_block_size;
+       int param_cs_user_data;
 
        struct ac_llvm_compiler *compiler;
 
        /* Preloaded descriptors. */
        LLVMValueRef esgs_ring;
        LLVMValueRef gsvs_ring[4];
        LLVMValueRef tess_offchip_ring;
 
        LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
        LLVMValueRef gs_next_vertex[4];
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to