Re: [Mesa-dev] [PATCH 12/25] radeonsi: add VS prolog
On 15.02.2016 18:59, Marek Olšák wrote: From: Marek Olšák This is disabled with use_monolithic_shaders = true. --- src/gallium/drivers/radeonsi/si_pipe.c | 19 +++ src/gallium/drivers/radeonsi/si_pipe.h | 3 + src/gallium/drivers/radeonsi/si_shader.c | 236 ++- src/gallium/drivers/radeonsi/si_shader.h | 9 ++ 4 files changed, 266 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 448fe88..7ce9570 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -22,6 +22,7 @@ */ #include "si_pipe.h" +#include "si_shader.h" #include "si_public.h" #include "sid.h" @@ -536,6 +537,11 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu static void si_destroy_screen(struct pipe_screen* pscreen) { struct si_screen *sscreen = (struct si_screen *)pscreen; + struct si_shader_part *parts[] = { + sscreen->vs_prologs, + /* this will be filled with other shader parts */ + }; + unsigned i; if (!sscreen) return; @@ -543,6 +549,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; + /* Free shader parts. */ + for (i = 0; i < ARRAY_SIZE(parts); i++) { + while (parts[i]) { + struct si_shader_part *part = parts[i]; + + parts[i] = part->next; + radeon_shader_binary_clean(&part->binary); + FREE(part); + } + } + pipe_mutex_destroy(sscreen->shader_parts_mutex); + r600_destroy_common_screen(&sscreen->b); } @@ -600,6 +618,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; + pipe_mutex_init(sscreen->shader_parts_mutex); sscreen->use_monolithic_shaders = true; if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2a2455c..f4bafc2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -87,6 +87,9 @@ struct si_screen { /* Whether shaders are monolithic (1-part) or separate (3-part). */ booluse_monolithic_shaders; + + pipe_mutex shader_parts_mutex; + struct si_shader_part *vs_prologs; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b74ed1e..fbb8394 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -83,6 +83,7 @@ struct si_shader_context int param_rel_auto_id; int param_vs_prim_id; int param_instance_id; + int param_vertex_index0; int param_tes_u; int param_tes_v; int param_tes_rel_patch_id; @@ -432,7 +433,11 @@ static void declare_input_vs( /* Build the attribute offset */ attribute_offset = lp_build_const_int32(gallivm, 0); - if (divisor) { + if (!ctx->is_monolithic) { + buffer_index = LLVMGetParam(radeon_bld->main_fn, + ctx->param_vertex_index0 + + input_index); + } else if (divisor) { /* Build index from instance ID, start instance and divisor */ ctx->shader->uses_instanceid = true; buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, @@ -3711,6 +3716,15 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_rel_auto_id = num_params++] = ctx->i32; params[ctx->param_vs_prim_id = num_params++] = ctx->i32; params[ctx->param_instance_id = num_params++] = ctx->i32; + + if (!ctx->is_monolithic && + !ctx->is_gs_copy_shader) { + /* Vertex load indices. */ + ctx->param_vertex_index0 = num_params; + + for (i = 0; i < shader->selector->info.num_inputs; i++) + params[num_params++] = ctx->i32; + } break; case TGSI_PROCESSOR_TESS_CTRL: @@ -4678,6 +4692,203 @@ out: return r; } +/** + * Create, compile and return a shader part (prolog or epilog). + * + * \param sscreen screen + * \param list list of shader parts of the same category + * \param key shader part key + * \param tm LLVM target machine + * \param debugdebug callback + * \param compile the callback responsible for compilation + * \return
[Mesa-dev] [PATCH 12/25] radeonsi: add VS prolog
From: Marek Olšák This is disabled with use_monolithic_shaders = true. --- src/gallium/drivers/radeonsi/si_pipe.c | 19 +++ src/gallium/drivers/radeonsi/si_pipe.h | 3 + src/gallium/drivers/radeonsi/si_shader.c | 236 ++- src/gallium/drivers/radeonsi/si_shader.h | 9 ++ 4 files changed, 266 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 448fe88..7ce9570 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -22,6 +22,7 @@ */ #include "si_pipe.h" +#include "si_shader.h" #include "si_public.h" #include "sid.h" @@ -536,6 +537,11 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu static void si_destroy_screen(struct pipe_screen* pscreen) { struct si_screen *sscreen = (struct si_screen *)pscreen; + struct si_shader_part *parts[] = { + sscreen->vs_prologs, + /* this will be filled with other shader parts */ + }; + unsigned i; if (!sscreen) return; @@ -543,6 +549,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; + /* Free shader parts. */ + for (i = 0; i < ARRAY_SIZE(parts); i++) { + while (parts[i]) { + struct si_shader_part *part = parts[i]; + + parts[i] = part->next; + radeon_shader_binary_clean(&part->binary); + FREE(part); + } + } + pipe_mutex_destroy(sscreen->shader_parts_mutex); + r600_destroy_common_screen(&sscreen->b); } @@ -600,6 +618,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; + pipe_mutex_init(sscreen->shader_parts_mutex); sscreen->use_monolithic_shaders = true; if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2a2455c..f4bafc2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -87,6 +87,9 @@ struct si_screen { /* Whether shaders are monolithic (1-part) or separate (3-part). */ booluse_monolithic_shaders; + + pipe_mutex shader_parts_mutex; + struct si_shader_part *vs_prologs; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b74ed1e..fbb8394 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -83,6 +83,7 @@ struct si_shader_context int param_rel_auto_id; int param_vs_prim_id; int param_instance_id; + int param_vertex_index0; int param_tes_u; int param_tes_v; int param_tes_rel_patch_id; @@ -432,7 +433,11 @@ static void declare_input_vs( /* Build the attribute offset */ attribute_offset = lp_build_const_int32(gallivm, 0); - if (divisor) { + if (!ctx->is_monolithic) { + buffer_index = LLVMGetParam(radeon_bld->main_fn, + ctx->param_vertex_index0 + + input_index); + } else if (divisor) { /* Build index from instance ID, start instance and divisor */ ctx->shader->uses_instanceid = true; buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, @@ -3711,6 +3716,15 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_rel_auto_id = num_params++] = ctx->i32; params[ctx->param_vs_prim_id = num_params++] = ctx->i32; params[ctx->param_instance_id = num_params++] = ctx->i32; + + if (!ctx->is_monolithic && + !ctx->is_gs_copy_shader) { + /* Vertex load indices. */ + ctx->param_vertex_index0 = num_params; + + for (i = 0; i < shader->selector->info.num_inputs; i++) + params[num_params++] = ctx->i32; + } break; case TGSI_PROCESSOR_TESS_CTRL: @@ -4678,6 +4692,203 @@ out: return r; } +/** + * Create, compile and return a shader part (prolog or epilog). + * + * \param sscreen screen + * \param list list of shader parts of the same category + * \param key shader part key + * \param tm LLVM target machine + * \param debugdebug callback + * \param compile the callback responsible for compilation + * \return non-NULL on success + */ +static stru