Re: [Mesa-dev] [PATCH 12/25] radeonsi: add VS prolog

2016-02-16 Thread Nicolai Hähnle

On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

This is disabled with use_monolithic_shaders = true.
---
  src/gallium/drivers/radeonsi/si_pipe.c   |  19 +++
  src/gallium/drivers/radeonsi/si_pipe.h   |   3 +
  src/gallium/drivers/radeonsi/si_shader.c | 236 ++-
  src/gallium/drivers/radeonsi/si_shader.h |   9 ++
  4 files changed, 266 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 448fe88..7ce9570 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -22,6 +22,7 @@
   */

  #include "si_pipe.h"
+#include "si_shader.h"
  #include "si_public.h"
  #include "sid.h"

@@ -536,6 +537,11 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
  static void si_destroy_screen(struct pipe_screen* pscreen)
  {
struct si_screen *sscreen = (struct si_screen *)pscreen;
+   struct si_shader_part *parts[] = {
+   sscreen->vs_prologs,
+   /* this will be filled with other shader parts */
+   };
+   unsigned i;

if (!sscreen)
return;
@@ -543,6 +549,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
if (!sscreen->b.ws->unref(sscreen->b.ws))
return;

+   /* Free shader parts. */
+   for (i = 0; i < ARRAY_SIZE(parts); i++) {
+   while (parts[i]) {
+   struct si_shader_part *part = parts[i];
+
+   parts[i] = part->next;
+   radeon_shader_binary_clean(>binary);
+   FREE(part);
+   }
+   }
+   pipe_mutex_destroy(sscreen->shader_parts_mutex);
+
r600_destroy_common_screen(>b);
  }

@@ -600,6 +618,7 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)

sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+   pipe_mutex_init(sscreen->shader_parts_mutex);
sscreen->use_monolithic_shaders = true;

if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 2a2455c..f4bafc2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -87,6 +87,9 @@ struct si_screen {

/* Whether shaders are monolithic (1-part) or separate (3-part). */
booluse_monolithic_shaders;
+
+   pipe_mutex  shader_parts_mutex;
+   struct si_shader_part   *vs_prologs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b74ed1e..fbb8394 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -83,6 +83,7 @@ struct si_shader_context
int param_rel_auto_id;
int param_vs_prim_id;
int param_instance_id;
+   int param_vertex_index0;
int param_tes_u;
int param_tes_v;
int param_tes_rel_patch_id;
@@ -432,7 +433,11 @@ static void declare_input_vs(
/* Build the attribute offset */
attribute_offset = lp_build_const_int32(gallivm, 0);

-   if (divisor) {
+   if (!ctx->is_monolithic) {
+   buffer_index = LLVMGetParam(radeon_bld->main_fn,
+   ctx->param_vertex_index0 +
+   input_index);
+   } else if (divisor) {
/* Build index from instance ID, start instance and divisor */
ctx->shader->uses_instanceid = true;
buffer_index = get_instance_index_for_fetch(>radeon_bld,
@@ -3711,6 +3716,15 @@ static void create_function(struct si_shader_context 
*ctx)
params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
params[ctx->param_instance_id = num_params++] = ctx->i32;
+
+   if (!ctx->is_monolithic &&
+   !ctx->is_gs_copy_shader) {
+   /* Vertex load indices. */
+   ctx->param_vertex_index0 = num_params;
+
+   for (i = 0; i < shader->selector->info.num_inputs; i++)
+   params[num_params++] = ctx->i32;
+   }
break;

case TGSI_PROCESSOR_TESS_CTRL:
@@ -4678,6 +4692,203 @@ out:
return r;
  }

+/**
+ * Create, compile and return a shader part (prolog or epilog).
+ *
+ * \param sscreen  screen
+ * \param list list of shader parts of the same category
+ * \param key  shader part key
+ * \param tm   LLVM target machine
+ * \param debugdebug callback
+ * \param compile  the callback responsible for compilation
+ * \return

[Mesa-dev] [PATCH 12/25] radeonsi: add VS prolog

2016-02-15 Thread Marek Olšák
From: Marek Olšák 

This is disabled with use_monolithic_shaders = true.
---
 src/gallium/drivers/radeonsi/si_pipe.c   |  19 +++
 src/gallium/drivers/radeonsi/si_pipe.h   |   3 +
 src/gallium/drivers/radeonsi/si_shader.c | 236 ++-
 src/gallium/drivers/radeonsi/si_shader.h |   9 ++
 4 files changed, 266 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 448fe88..7ce9570 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -22,6 +22,7 @@
  */
 
 #include "si_pipe.h"
+#include "si_shader.h"
 #include "si_public.h"
 #include "sid.h"
 
@@ -536,6 +537,11 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
 static void si_destroy_screen(struct pipe_screen* pscreen)
 {
struct si_screen *sscreen = (struct si_screen *)pscreen;
+   struct si_shader_part *parts[] = {
+   sscreen->vs_prologs,
+   /* this will be filled with other shader parts */
+   };
+   unsigned i;
 
if (!sscreen)
return;
@@ -543,6 +549,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
if (!sscreen->b.ws->unref(sscreen->b.ws))
return;
 
+   /* Free shader parts. */
+   for (i = 0; i < ARRAY_SIZE(parts); i++) {
+   while (parts[i]) {
+   struct si_shader_part *part = parts[i];
+
+   parts[i] = part->next;
+   radeon_shader_binary_clean(>binary);
+   FREE(part);
+   }
+   }
+   pipe_mutex_destroy(sscreen->shader_parts_mutex);
+
r600_destroy_common_screen(>b);
 }
 
@@ -600,6 +618,7 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
 
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+   pipe_mutex_init(sscreen->shader_parts_mutex);
sscreen->use_monolithic_shaders = true;
 
if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 2a2455c..f4bafc2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -87,6 +87,9 @@ struct si_screen {
 
/* Whether shaders are monolithic (1-part) or separate (3-part). */
booluse_monolithic_shaders;
+
+   pipe_mutex  shader_parts_mutex;
+   struct si_shader_part   *vs_prologs;
 };
 
 struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b74ed1e..fbb8394 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -83,6 +83,7 @@ struct si_shader_context
int param_rel_auto_id;
int param_vs_prim_id;
int param_instance_id;
+   int param_vertex_index0;
int param_tes_u;
int param_tes_v;
int param_tes_rel_patch_id;
@@ -432,7 +433,11 @@ static void declare_input_vs(
/* Build the attribute offset */
attribute_offset = lp_build_const_int32(gallivm, 0);
 
-   if (divisor) {
+   if (!ctx->is_monolithic) {
+   buffer_index = LLVMGetParam(radeon_bld->main_fn,
+   ctx->param_vertex_index0 +
+   input_index);
+   } else if (divisor) {
/* Build index from instance ID, start instance and divisor */
ctx->shader->uses_instanceid = true;
buffer_index = get_instance_index_for_fetch(>radeon_bld,
@@ -3711,6 +3716,15 @@ static void create_function(struct si_shader_context 
*ctx)
params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
params[ctx->param_instance_id = num_params++] = ctx->i32;
+
+   if (!ctx->is_monolithic &&
+   !ctx->is_gs_copy_shader) {
+   /* Vertex load indices. */
+   ctx->param_vertex_index0 = num_params;
+
+   for (i = 0; i < shader->selector->info.num_inputs; i++)
+   params[num_params++] = ctx->i32;
+   }
break;
 
case TGSI_PROCESSOR_TESS_CTRL:
@@ -4678,6 +4692,203 @@ out:
return r;
 }
 
+/**
+ * Create, compile and return a shader part (prolog or epilog).
+ *
+ * \param sscreen  screen
+ * \param list list of shader parts of the same category
+ * \param key  shader part key
+ * \param tm   LLVM target machine
+ * \param debugdebug callback
+ * \param compile  the callback responsible for compilation
+ * \return non-NULL on success
+ */
+static