Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory

2016-02-18 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 18.02.2016 16:10, Marek Olšák wrote:

A new version of the patch is attached. Please review.

Marek

On Tue, Feb 16, 2016 at 6:02 PM, Nicolai Hähnle  wrote:

On 15.02.2016 18:59, Marek Olšák wrote:


From: Marek Olšák 

---
   src/gallium/drivers/radeonsi/si_pipe.c  |   5 +-
   src/gallium/drivers/radeonsi/si_pipe.h  |  16 ++
   src/gallium/drivers/radeonsi/si_shader.h|   4 +-
   src/gallium/drivers/radeonsi/si_state.h |   2 +
   src/gallium/drivers/radeonsi/si_state_shaders.c | 234
+++-
   5 files changed, 254 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
b/src/gallium/drivers/radeonsi/si_pipe.c
index 75d4775..a576237 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen*
pscreen)
 }
 }
 pipe_mutex_destroy(sscreen->shader_parts_mutex);
-
+   si_destroy_shader_cache(sscreen);
 r600_destroy_common_screen(>b);
   }

@@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct
radeon_winsys *ws)
 sscreen->b.b.resource_create = r600_resource_create_common;

 if (!r600_common_screen_init(>b, ws) ||
-   !si_init_gs_info(sscreen)) {
+   !si_init_gs_info(sscreen) ||
+   !si_init_shader_cache(sscreen)) {
 FREE(sscreen);
 return NULL;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
b/src/gallium/drivers/radeonsi/si_pipe.h
index 1ac7bc4..ef860a5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,6 +80,7 @@
   #define SI_MAX_BORDER_COLORS  4096

   struct si_compute;
+struct hash_table;

   struct si_screen {
 struct r600_common_screen   b;
@@ -94,6 +95,21 @@ struct si_screen {
 struct si_shader_part   *tcs_epilogs;
 struct si_shader_part   *ps_prologs;
 struct si_shader_part   *ps_epilogs;
+
+   /* Shader cache in memory.
+*
+* Design & limitations:
+* - The shader cache is per screen (= per process), never saved
to
+*   disk, and skips redundant shader compilations from TGSI to
bytecode.
+* - It can only be used with one-variant-per-shader support, in
which
+*   case only the main (typically middle) part of shaders is
cached.
+* - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+*   variants of VS and TES are cached, so LS and ES aren't.
+* - GS and CS aren't cached, but it's certainly possible to cache
+*   those as well.
+*/
+   pipe_mutex  shader_cache_mutex;
+   struct hash_table   *shader_cache;
   };

   struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h
b/src/gallium/drivers/radeonsi/si_shader.h
index 48e048d..7e46871 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -362,8 +362,10 @@ struct si_shader {
 struct r600_resource*bo;
 struct r600_resource*scratch_bo;
 union si_shader_key key;
-   struct radeon_shader_binary binary;
 boolis_binary_shared;
+
+   /* The following data is all that's needed for binary shaders. */
+   struct radeon_shader_binary binary;
 struct si_shader_config config;
 struct si_shader_info   info;
   };
diff --git a/src/gallium/drivers/radeonsi/si_state.h
b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d4..40792cb 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context
*ctx,
   /* si_state_shader.c */
   bool si_update_shaders(struct si_context *sctx);
   void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);

   /* si_state_draw.c */
   void si_emit_cache_flush(struct si_context *sctx, struct r600_atom
*atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c62cbb7..bc3e5be 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,217 @@

   #include "tgsi/tgsi_parse.h"
   #include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
   #include "util/u_memory.h"
   #include "util/u_prim.h"
   #include "util/u_simple_shaders.h"

+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size
as
+ * integer.
+ */
+static void 

Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory

2016-02-18 Thread Marek Olšák
A new version of the patch is attached. Please review.

Marek

On Tue, Feb 16, 2016 at 6:02 PM, Nicolai Hähnle  wrote:
> On 15.02.2016 18:59, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_pipe.c  |   5 +-
>>   src/gallium/drivers/radeonsi/si_pipe.h  |  16 ++
>>   src/gallium/drivers/radeonsi/si_shader.h|   4 +-
>>   src/gallium/drivers/radeonsi/si_state.h |   2 +
>>   src/gallium/drivers/radeonsi/si_state_shaders.c | 234
>> +++-
>>   5 files changed, 254 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>> b/src/gallium/drivers/radeonsi/si_pipe.c
>> index 75d4775..a576237 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen*
>> pscreen)
>> }
>> }
>> pipe_mutex_destroy(sscreen->shader_parts_mutex);
>> -
>> +   si_destroy_shader_cache(sscreen);
>> r600_destroy_common_screen(>b);
>>   }
>>
>> @@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct
>> radeon_winsys *ws)
>> sscreen->b.b.resource_create = r600_resource_create_common;
>>
>> if (!r600_common_screen_init(>b, ws) ||
>> -   !si_init_gs_info(sscreen)) {
>> +   !si_init_gs_info(sscreen) ||
>> +   !si_init_shader_cache(sscreen)) {
>> FREE(sscreen);
>> return NULL;
>> }
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> b/src/gallium/drivers/radeonsi/si_pipe.h
>> index 1ac7bc4..ef860a5 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -80,6 +80,7 @@
>>   #define SI_MAX_BORDER_COLORS  4096
>>
>>   struct si_compute;
>> +struct hash_table;
>>
>>   struct si_screen {
>> struct r600_common_screen   b;
>> @@ -94,6 +95,21 @@ struct si_screen {
>> struct si_shader_part   *tcs_epilogs;
>> struct si_shader_part   *ps_prologs;
>> struct si_shader_part   *ps_epilogs;
>> +
>> +   /* Shader cache in memory.
>> +*
>> +* Design & limitations:
>> +* - The shader cache is per screen (= per process), never saved
>> to
>> +*   disk, and skips redundant shader compilations from TGSI to
>> bytecode.
>> +* - It can only be used with one-variant-per-shader support, in
>> which
>> +*   case only the main (typically middle) part of shaders is
>> cached.
>> +* - Only VS, TCS, TES, PS are cached, out of which only the hw VS
>> +*   variants of VS and TES are cached, so LS and ES aren't.
>> +* - GS and CS aren't cached, but it's certainly possible to cache
>> +*   those as well.
>> +*/
>> +   pipe_mutex  shader_cache_mutex;
>> +   struct hash_table   *shader_cache;
>>   };
>>
>>   struct si_blend_color {
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h
>> b/src/gallium/drivers/radeonsi/si_shader.h
>> index 48e048d..7e46871 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -362,8 +362,10 @@ struct si_shader {
>> struct r600_resource*bo;
>> struct r600_resource*scratch_bo;
>> union si_shader_key key;
>> -   struct radeon_shader_binary binary;
>> boolis_binary_shared;
>> +
>> +   /* The following data is all that's needed for binary shaders. */
>> +   struct radeon_shader_binary binary;
>> struct si_shader_config config;
>> struct si_shader_info   info;
>>   };
>> diff --git a/src/gallium/drivers/radeonsi/si_state.h
>> b/src/gallium/drivers/radeonsi/si_state.h
>> index f64c4d4..40792cb 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.h
>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>> @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context
>> *ctx,
>>   /* si_state_shader.c */
>>   bool si_update_shaders(struct si_context *sctx);
>>   void si_init_shader_functions(struct si_context *sctx);
>> +bool si_init_shader_cache(struct si_screen *sscreen);
>> +void si_destroy_shader_cache(struct si_screen *sscreen);
>>
>>   /* si_state_draw.c */
>>   void si_emit_cache_flush(struct si_context *sctx, struct r600_atom
>> *atom);
>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> index c62cbb7..bc3e5be 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> @@ -32,10 +32,217 @@
>>
>>   #include "tgsi/tgsi_parse.h"
>>   #include "tgsi/tgsi_ureg.h"
>> +#include "util/hash_table.h"
>> +#include "util/u_hash.h"
>>   #include "util/u_memory.h"
>>   

Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory

2016-02-16 Thread Nicolai Hähnle

On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c  |   5 +-
  src/gallium/drivers/radeonsi/si_pipe.h  |  16 ++
  src/gallium/drivers/radeonsi/si_shader.h|   4 +-
  src/gallium/drivers/radeonsi/si_state.h |   2 +
  src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++-
  5 files changed, 254 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 75d4775..a576237 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
}
}
pipe_mutex_destroy(sscreen->shader_parts_mutex);
-
+   si_destroy_shader_cache(sscreen);
r600_destroy_common_screen(>b);
  }

@@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;

if (!r600_common_screen_init(>b, ws) ||
-   !si_init_gs_info(sscreen)) {
+   !si_init_gs_info(sscreen) ||
+   !si_init_shader_cache(sscreen)) {
FREE(sscreen);
return NULL;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 1ac7bc4..ef860a5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,6 +80,7 @@
  #define SI_MAX_BORDER_COLORS  4096

  struct si_compute;
+struct hash_table;

  struct si_screen {
struct r600_common_screen   b;
@@ -94,6 +95,21 @@ struct si_screen {
struct si_shader_part   *tcs_epilogs;
struct si_shader_part   *ps_prologs;
struct si_shader_part   *ps_epilogs;
+
+   /* Shader cache in memory.
+*
+* Design & limitations:
+* - The shader cache is per screen (= per process), never saved to
+*   disk, and skips redundant shader compilations from TGSI to 
bytecode.
+* - It can only be used with one-variant-per-shader support, in which
+*   case only the main (typically middle) part of shaders is cached.
+* - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+*   variants of VS and TES are cached, so LS and ES aren't.
+* - GS and CS aren't cached, but it's certainly possible to cache
+*   those as well.
+*/
+   pipe_mutex  shader_cache_mutex;
+   struct hash_table   *shader_cache;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 48e048d..7e46871 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -362,8 +362,10 @@ struct si_shader {
struct r600_resource*bo;
struct r600_resource*scratch_bo;
union si_shader_key key;
-   struct radeon_shader_binary binary;
boolis_binary_shared;
+
+   /* The following data is all that's needed for binary shaders. */
+   struct radeon_shader_binary binary;
struct si_shader_config config;
struct si_shader_info   info;
  };
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d4..40792cb 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
  /* si_state_shader.c */
  bool si_update_shaders(struct si_context *sctx);
  void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);

  /* si_state_draw.c */
  void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c62cbb7..bc3e5be 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,217 @@

  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
  #include "util/u_memory.h"
  #include "util/u_prim.h"
  #include "util/u_simple_shaders.h"

+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
+ * integer.
+ */
+static void *si_get_tgsi_binary(struct si_shader_selector *sel)
+{
+   unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
+sizeof(struct tgsi_token);
+   unsigned size = 4 + tgsi_size + sizeof(sel->so);
+   char *result = (char*)MALLOC(size);
+
+   if (!result)