Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory
Reviewed-by: Nicolai Hähnle On 18.02.2016 16:10, Marek Olšák wrote: A new version of the patch is attached. Please review. Marek On Tue, Feb 16, 2016 at 6:02 PM, Nicolai Hähnle wrote: On 15.02.2016 18:59, Marek Olšák wrote: From: Marek Olšák --- src/gallium/drivers/radeonsi/si_pipe.c | 5 +- src/gallium/drivers/radeonsi/si_pipe.h | 16 ++ src/gallium/drivers/radeonsi/si_shader.h| 4 +- src/gallium/drivers/radeonsi/si_state.h | 2 + src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++- 5 files changed, 254 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 75d4775..a576237 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen) } } pipe_mutex_destroy(sscreen->shader_parts_mutex); - + si_destroy_shader_cache(sscreen); r600_destroy_common_screen(&sscreen->b); } @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_init_gs_info(sscreen)) { + !si_init_gs_info(sscreen) || + !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1ac7bc4..ef860a5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -80,6 +80,7 @@ #define SI_MAX_BORDER_COLORS 4096 struct si_compute; +struct hash_table; struct si_screen { struct r600_common_screen b; @@ -94,6 +95,21 @@ struct si_screen { struct si_shader_part *tcs_epilogs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; + + /* Shader cache in memory. +* +* Design & limitations: +* - The shader cache is per screen (= per process), never saved to +* disk, and skips redundant shader compilations from TGSI to bytecode. +* - It can only be used with one-variant-per-shader support, in which +* case only the main (typically middle) part of shaders is cached. +* - Only VS, TCS, TES, PS are cached, out of which only the hw VS +* variants of VS and TES are cached, so LS and ES aren't. +* - GS and CS aren't cached, but it's certainly possible to cache +* those as well. +*/ + pipe_mutex shader_cache_mutex; + struct hash_table *shader_cache; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 48e048d..7e46871 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -362,8 +362,10 @@ struct si_shader { struct r600_resource*bo; struct r600_resource*scratch_bo; union si_shader_key key; - struct radeon_shader_binary binary; boolis_binary_shared; + + /* The following data is all that's needed for binary shaders. */ + struct radeon_shader_binary binary; struct si_shader_config config; struct si_shader_info info; }; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f64c4d4..40792cb 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx, /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); +bool si_init_shader_cache(struct si_screen *sscreen); +void si_destroy_shader_cache(struct si_screen *sscreen); /* si_state_draw.c */ void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c62cbb7..bc3e5be 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -32,10 +32,217 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" +#include "util/hash_table.h" +#include "util/u_hash.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_simple_shaders.h" +/* SHADER_CACHE */ + +/** + * Return the TGSI binary in a buffer. The first 4 bytes contain its size as + * integer. + */ +static void *si_get_tgsi_binary(struct si_shader_selector *sel) +{ + unsigned
Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory
A new version of the patch is attached. Please review. Marek On Tue, Feb 16, 2016 at 6:02 PM, Nicolai Hähnle wrote: > On 15.02.2016 18:59, Marek Olšák wrote: >> >> From: Marek Olšák >> >> --- >> src/gallium/drivers/radeonsi/si_pipe.c | 5 +- >> src/gallium/drivers/radeonsi/si_pipe.h | 16 ++ >> src/gallium/drivers/radeonsi/si_shader.h| 4 +- >> src/gallium/drivers/radeonsi/si_state.h | 2 + >> src/gallium/drivers/radeonsi/si_state_shaders.c | 234 >> +++- >> 5 files changed, 254 insertions(+), 7 deletions(-) >> >> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c >> b/src/gallium/drivers/radeonsi/si_pipe.c >> index 75d4775..a576237 100644 >> --- a/src/gallium/drivers/radeonsi/si_pipe.c >> +++ b/src/gallium/drivers/radeonsi/si_pipe.c >> @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* >> pscreen) >> } >> } >> pipe_mutex_destroy(sscreen->shader_parts_mutex); >> - >> + si_destroy_shader_cache(sscreen); >> r600_destroy_common_screen(&sscreen->b); >> } >> >> @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct >> radeon_winsys *ws) >> sscreen->b.b.resource_create = r600_resource_create_common; >> >> if (!r600_common_screen_init(&sscreen->b, ws) || >> - !si_init_gs_info(sscreen)) { >> + !si_init_gs_info(sscreen) || >> + !si_init_shader_cache(sscreen)) { >> FREE(sscreen); >> return NULL; >> } >> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h >> b/src/gallium/drivers/radeonsi/si_pipe.h >> index 1ac7bc4..ef860a5 100644 >> --- a/src/gallium/drivers/radeonsi/si_pipe.h >> +++ b/src/gallium/drivers/radeonsi/si_pipe.h >> @@ -80,6 +80,7 @@ >> #define SI_MAX_BORDER_COLORS 4096 >> >> struct si_compute; >> +struct hash_table; >> >> struct si_screen { >> struct r600_common_screen b; >> @@ -94,6 +95,21 @@ struct si_screen { >> struct si_shader_part *tcs_epilogs; >> struct si_shader_part *ps_prologs; >> struct si_shader_part *ps_epilogs; >> + >> + /* Shader cache in memory. >> +* >> +* Design & limitations: >> +* - The shader cache is per screen (= per process), never saved >> to >> +* disk, and skips redundant shader compilations from TGSI to >> bytecode. >> +* - It can only be used with one-variant-per-shader support, in >> which >> +* case only the main (typically middle) part of shaders is >> cached. >> +* - Only VS, TCS, TES, PS are cached, out of which only the hw VS >> +* variants of VS and TES are cached, so LS and ES aren't. >> +* - GS and CS aren't cached, but it's certainly possible to cache >> +* those as well. >> +*/ >> + pipe_mutex shader_cache_mutex; >> + struct hash_table *shader_cache; >> }; >> >> struct si_blend_color { >> diff --git a/src/gallium/drivers/radeonsi/si_shader.h >> b/src/gallium/drivers/radeonsi/si_shader.h >> index 48e048d..7e46871 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader.h >> +++ b/src/gallium/drivers/radeonsi/si_shader.h >> @@ -362,8 +362,10 @@ struct si_shader { >> struct r600_resource*bo; >> struct r600_resource*scratch_bo; >> union si_shader_key key; >> - struct radeon_shader_binary binary; >> boolis_binary_shared; >> + >> + /* The following data is all that's needed for binary shaders. */ >> + struct radeon_shader_binary binary; >> struct si_shader_config config; >> struct si_shader_info info; >> }; >> diff --git a/src/gallium/drivers/radeonsi/si_state.h >> b/src/gallium/drivers/radeonsi/si_state.h >> index f64c4d4..40792cb 100644 >> --- a/src/gallium/drivers/radeonsi/si_state.h >> +++ b/src/gallium/drivers/radeonsi/si_state.h >> @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context >> *ctx, >> /* si_state_shader.c */ >> bool si_update_shaders(struct si_context *sctx); >> void si_init_shader_functions(struct si_context *sctx); >> +bool si_init_shader_cache(struct si_screen *sscreen); >> +void si_destroy_shader_cache(struct si_screen *sscreen); >> >> /* si_state_draw.c */ >> void si_emit_cache_flush(struct si_context *sctx, struct r600_atom >> *atom); >> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c >> b/src/gallium/drivers/radeonsi/si_state_shaders.c >> index c62cbb7..bc3e5be 100644 >> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c >> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c >> @@ -32,10 +32,217 @@ >> >> #include "tgsi/tgsi_parse.h" >> #include "tgsi/tgsi_ureg.h" >> +#include "util/hash_table.h" >> +#include "util/u_hash.h" >> #include "util/u_memory.h" >> #include "util/u_prim.h" >
Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory
On 15.02.2016 18:59, Marek Olšák wrote: From: Marek Olšák --- src/gallium/drivers/radeonsi/si_pipe.c | 5 +- src/gallium/drivers/radeonsi/si_pipe.h | 16 ++ src/gallium/drivers/radeonsi/si_shader.h| 4 +- src/gallium/drivers/radeonsi/si_state.h | 2 + src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++- 5 files changed, 254 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 75d4775..a576237 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen) } } pipe_mutex_destroy(sscreen->shader_parts_mutex); - + si_destroy_shader_cache(sscreen); r600_destroy_common_screen(&sscreen->b); } @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_init_gs_info(sscreen)) { + !si_init_gs_info(sscreen) || + !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1ac7bc4..ef860a5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -80,6 +80,7 @@ #define SI_MAX_BORDER_COLORS 4096 struct si_compute; +struct hash_table; struct si_screen { struct r600_common_screen b; @@ -94,6 +95,21 @@ struct si_screen { struct si_shader_part *tcs_epilogs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; + + /* Shader cache in memory. +* +* Design & limitations: +* - The shader cache is per screen (= per process), never saved to +* disk, and skips redundant shader compilations from TGSI to bytecode. +* - It can only be used with one-variant-per-shader support, in which +* case only the main (typically middle) part of shaders is cached. +* - Only VS, TCS, TES, PS are cached, out of which only the hw VS +* variants of VS and TES are cached, so LS and ES aren't. +* - GS and CS aren't cached, but it's certainly possible to cache +* those as well. +*/ + pipe_mutex shader_cache_mutex; + struct hash_table *shader_cache; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 48e048d..7e46871 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -362,8 +362,10 @@ struct si_shader { struct r600_resource*bo; struct r600_resource*scratch_bo; union si_shader_key key; - struct radeon_shader_binary binary; boolis_binary_shared; + + /* The following data is all that's needed for binary shaders. */ + struct radeon_shader_binary binary; struct si_shader_config config; struct si_shader_info info; }; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f64c4d4..40792cb 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx, /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); +bool si_init_shader_cache(struct si_screen *sscreen); +void si_destroy_shader_cache(struct si_screen *sscreen); /* si_state_draw.c */ void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c62cbb7..bc3e5be 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -32,10 +32,217 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" +#include "util/hash_table.h" +#include "util/u_hash.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_simple_shaders.h" +/* SHADER_CACHE */ + +/** + * Return the TGSI binary in a buffer. The first 4 bytes contain its size as + * integer. + */ +static void *si_get_tgsi_binary(struct si_shader_selector *sel) +{ + unsigned tgsi_size = tgsi_num_tokens(sel->tokens) * +sizeof(struct tgsi_token); + unsigned size = 4 + tgsi_size + sizeof(sel->so); + char *result = (char*)MALLOC(size); + + if (!result) +
[Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory
From: Marek Olšák --- src/gallium/drivers/radeonsi/si_pipe.c | 5 +- src/gallium/drivers/radeonsi/si_pipe.h | 16 ++ src/gallium/drivers/radeonsi/si_shader.h| 4 +- src/gallium/drivers/radeonsi/si_state.h | 2 + src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++- 5 files changed, 254 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 75d4775..a576237 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen) } } pipe_mutex_destroy(sscreen->shader_parts_mutex); - + si_destroy_shader_cache(sscreen); r600_destroy_common_screen(&sscreen->b); } @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_init_gs_info(sscreen)) { + !si_init_gs_info(sscreen) || + !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1ac7bc4..ef860a5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -80,6 +80,7 @@ #define SI_MAX_BORDER_COLORS 4096 struct si_compute; +struct hash_table; struct si_screen { struct r600_common_screen b; @@ -94,6 +95,21 @@ struct si_screen { struct si_shader_part *tcs_epilogs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; + + /* Shader cache in memory. +* +* Design & limitations: +* - The shader cache is per screen (= per process), never saved to +* disk, and skips redundant shader compilations from TGSI to bytecode. +* - It can only be used with one-variant-per-shader support, in which +* case only the main (typically middle) part of shaders is cached. +* - Only VS, TCS, TES, PS are cached, out of which only the hw VS +* variants of VS and TES are cached, so LS and ES aren't. +* - GS and CS aren't cached, but it's certainly possible to cache +* those as well. +*/ + pipe_mutex shader_cache_mutex; + struct hash_table *shader_cache; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 48e048d..7e46871 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -362,8 +362,10 @@ struct si_shader { struct r600_resource*bo; struct r600_resource*scratch_bo; union si_shader_key key; - struct radeon_shader_binary binary; boolis_binary_shared; + + /* The following data is all that's needed for binary shaders. */ + struct radeon_shader_binary binary; struct si_shader_config config; struct si_shader_info info; }; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f64c4d4..40792cb 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx, /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); +bool si_init_shader_cache(struct si_screen *sscreen); +void si_destroy_shader_cache(struct si_screen *sscreen); /* si_state_draw.c */ void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c62cbb7..bc3e5be 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -32,10 +32,217 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" +#include "util/hash_table.h" +#include "util/u_hash.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_simple_shaders.h" +/* SHADER_CACHE */ + +/** + * Return the TGSI binary in a buffer. The first 4 bytes contain its size as + * integer. + */ +static void *si_get_tgsi_binary(struct si_shader_selector *sel) +{ + unsigned tgsi_size = tgsi_num_tokens(sel->tokens) * +sizeof(struct tgsi_token); + unsigned size = 4 + tgsi_size + sizeof(sel->so); + char *result = (char*)MALLOC(size); + + if (!result) + return NULL; + + *((uint32_t*)result)