From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 16 +++++++++++++--- src/gallium/drivers/radeonsi/si_compute.h | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 11 +++++++++++ src/gallium/drivers/radeonsi/si_shader.h | 7 +++++++ .../drivers/radeonsi/si_shader_internal.h | 1 + 6 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ea6fa3e999d..c5d3d5fcf02 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -121,20 +121,22 @@ static void si_create_compute_state_async(void *job, int thread_index) &program->active_samplers_and_images); program->shader.selector = &sel; program->shader.is_monolithic = true; program->uses_grid_size = sel.info.uses_grid_size; program->uses_bindless_samplers = sel.info.uses_bindless_samplers; program->uses_bindless_images = sel.info.uses_bindless_images; program->reads_variable_block_size = sel.info.uses_block_size && sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; + program->num_cs_user_data_dwords = + sel.info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS]; void *ir_binary = si_get_ir_binary(&sel); /* Try to load the shader from the shader cache. */ mtx_lock(&sscreen->shader_cache_mutex); if (ir_binary && si_shader_cache_load_shader(sscreen, ir_binary, shader)) { mtx_unlock(&sscreen->shader_cache_mutex); @@ -152,21 +154,22 @@ static void si_create_compute_state_async(void *job, int thread_index) if (program->ir_type == PIPE_SHADER_IR_TGSI) FREE(program->ir.tgsi); program->shader.selector = NULL; return; } bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0; unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel.info.uses_grid_size ? 3 : 0) + - (program->reads_variable_block_size ? 3 : 0); + (program->reads_variable_block_size ? 3 : 0) + + program->num_cs_user_data_dwords; shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(shader->config.float_mode); shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) | S_00B84C_SCRATCH_EN(scratch_enabled) | @@ -699,30 +702,32 @@ static bool si_upload_compute_input(struct si_context *sctx, radeon_emit(cs, kernel_args_va); radeon_emit(cs, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0)); } r600_resource_reference(&input_buffer, NULL); return true; } -static void si_setup_tgsi_grid(struct si_context *sctx, +static void si_setup_tgsi_user_data(struct si_context *sctx, const struct pipe_grid_info *info) { struct si_compute *program = sctx->cs_shader_state.program; struct radeon_cmdbuf *cs = sctx->gfx_cs; unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 + 4 * SI_NUM_RESOURCE_SGPRS; unsigned block_size_reg = grid_size_reg + /* 12 bytes = 3 dwords. */ 12 * program->uses_grid_size; + unsigned cs_user_data_reg = block_size_reg + + 12 * program->reads_variable_block_size; if (info->indirect) { if (program->uses_grid_size) { uint64_t base_va = r600_resource(info->indirect)->gpu_address; uint64_t va = base_va + info->indirect_offset; int i; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, r600_resource(info->indirect), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); @@ -744,20 +749,25 @@ static void si_setup_tgsi_grid(struct si_context *sctx, radeon_emit(cs, info->grid[1]); radeon_emit(cs, info->grid[2]); } if (program->reads_variable_block_size) { radeon_set_sh_reg_seq(cs, block_size_reg, 3); radeon_emit(cs, info->block[0]); radeon_emit(cs, info->block[1]); radeon_emit(cs, info->block[2]); } } + + if (program->num_cs_user_data_dwords) { + radeon_set_sh_reg_seq(cs, cs_user_data_reg, program->num_cs_user_data_dwords); + radeon_emit_array(cs, sctx->cs_user_data, program->num_cs_user_data_dwords); + } } static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info) { struct si_screen *sscreen = sctx->screen; struct radeon_cmdbuf *cs = sctx->gfx_cs; bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off; unsigned waves_per_threadgroup = DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64); @@ -901,21 +911,21 @@ static void si_launch_grid( r600_resource(program->global_buffers[i]); if (!buffer) { continue; } radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_COMPUTE_GLOBAL); } if (program->ir_type != PIPE_SHADER_IR_NATIVE) - si_setup_tgsi_grid(sctx, info); + si_setup_tgsi_user_data(sctx, info); si_emit_dispatch_packets(sctx, info); if (unlikely(sctx->current_saved_cs)) { si_trace_emit(sctx); si_log_compute_state(sctx, sctx->log); } sctx->compute_is_busy = true; sctx->num_compute_calls++; diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h index ef8b4aec4df..99b501673c5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.h +++ b/src/gallium/drivers/radeonsi/si_compute.h @@ -50,20 +50,21 @@ struct si_compute { unsigned private_size; unsigned input_size; struct si_shader shader; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; unsigned use_code_object_v2 : 1; unsigned uses_grid_size:1; unsigned uses_bindless_samplers:1; unsigned uses_bindless_images:1; bool reads_variable_block_size; + unsigned num_cs_user_data_dwords; }; void si_destroy_compute(struct si_compute *program); static inline void si_compute_reference(struct si_compute **dst, struct si_compute *src) { if (pipe_reference(&(*dst)->reference, &src->reference)) si_destroy_compute(*dst); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 4c3f13b84e2..100d0166f62 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -847,20 +847,21 @@ struct si_context { struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ struct r600_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ unsigned border_color_count; unsigned num_vs_blit_sgprs; uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD]; + uint32_t cs_user_data[4]; /* Vertex and index buffers. */ bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; /* MSAA config state. */ int ps_iter_samples; bool ps_uses_fbfetch; bool smoothing_enabled; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cfd99b61601..c51e91b1d3d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2260,20 +2260,24 @@ void si_load_system_value(struct si_shader_context *ctx, } id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); value = LLVMBuildShl(ctx->ac.builder, value, id, ""); if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK || decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK) value = LLVMBuildNot(ctx->ac.builder, value, ""); value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); break; } + case TGSI_SEMANTIC_CS_USER_DATA: + value = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data); + break; + default: assert(!"unknown system value"); return; } ctx->system_values[index] = value; } void si_declare_compute_memory(struct si_shader_context *ctx) { @@ -4941,20 +4945,27 @@ static void create_function(struct si_shader_context *ctx) case PIPE_SHADER_COMPUTE: declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, true); if (shader->selector->info.uses_grid_size) add_arg_assign(&fninfo, ARG_SGPR, v3i32, &ctx->abi.num_work_groups); if (shader->selector->info.uses_block_size && shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32); + unsigned cs_user_data_dwords = + shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS]; + if (cs_user_data_dwords) { + ctx->param_cs_user_data = add_arg(&fninfo, ARG_SGPR, + LLVMVectorType(ctx->i32, cs_user_data_dwords)); + } + for (i = 0; i < 3; i++) { ctx->abi.workgroup_ids[i] = NULL; if (shader->selector->info.uses_block_id[i]) add_arg_assign(&fninfo, ARG_SGPR, ctx->i32, &ctx->abi.workgroup_ids[i]); } add_arg_assign(&fninfo, ARG_VGPR, v3i32, &ctx->abi.local_invocation_ids); break; default: assert(0 && "unimplemented shader"); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 2dc4bc7e787..14230b82073 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -269,28 +269,35 @@ enum { #define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE #define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1) #define C_VS_STATE_INDEXED 0xFFFFFFFD #define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8) #define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24) #define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF /* SI-specific system values. */ enum { + /* Values from set_tess_state. */ TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT, TGSI_SEMANTIC_DEFAULT_TESSINNER_SI, + + /* Up to 4 dwords in user SGPRs for compute shaders. */ + TGSI_SEMANTIC_CS_USER_DATA, }; enum { /* Use a property enum that CS wouldn't use. */ TGSI_PROPERTY_CS_LOCAL_SIZE = TGSI_PROPERTY_FS_COORD_ORIGIN, + /* The number of used user data dwords in the range [1, 4]. */ + TGSI_PROPERTY_CS_USER_DATA_DWORDS = TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, + /* Use a property enum that VS wouldn't use. */ TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN, /* These represent the number of SGPRs the shader uses. */ SI_VS_BLIT_SGPRS_POS = 3, SI_VS_BLIT_SGPRS_POS_COLOR = 7, SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9, }; /* For VS shader key fix_fetch. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 6cc503690da..f187a06854d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -166,20 +166,21 @@ struct si_shader_context { int param_es2gs_offset; /* API GS */ int param_gs2vs_offset; int param_gs_wave_id; /* GFX6 */ LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */ int param_gs_vtx01_offset; /* in dwords (GFX9) */ int param_gs_vtx23_offset; /* in dwords (GFX9) */ int param_gs_vtx45_offset; /* in dwords (GFX9) */ /* CS */ int param_block_size; + int param_cs_user_data; struct ac_llvm_compiler *compiler; /* Preloaded descriptors. */ LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef tess_offchip_ring; LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */ LLVMValueRef gs_next_vertex[4]; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev