Re: [Mesa-dev] [PATCH 11/27] i965: Store gather table information in the program data

2015-05-08 Thread Abdiel Janulgue


On 05/07/2015 06:17 PM, Pohjolainen, Topi wrote:
> On Tue, Apr 28, 2015 at 11:08:08PM +0300, Abdiel Janulgue wrote:
>> The resource streamer is able to gather and pack sparsely-located
>> constant data from any buffer object by a referring to a gather table
>> This patch adds support for keeping track of these constant data
>> fetches into a gather table.
>>
>> The gather table is generated from two sources. Ordinary uniform fetches
>> are stored first. These are then combined with a separate table containing
>> UBO entries. The separate entry for UBOs is needed to make it easier to
>> generate the gather mask when combining and packing the constant data.
>>
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h  |  9 +
>>  src/mesa/drivers/dri/i965/brw_gs.c   |  4 
>>  src/mesa/drivers/dri/i965/brw_program.c  |  5 +
>>  src/mesa/drivers/dri/i965/brw_shader.cpp |  4 +++-
>>  src/mesa/drivers/dri/i965/brw_shader.h   | 11 +++
>>  src/mesa/drivers/dri/i965/brw_vs.c   |  5 +
>>  src/mesa/drivers/dri/i965/brw_wm.c   |  5 +
>>  7 files changed, 42 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index 7fd49e9..e25c64d 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -355,9 +355,12 @@ struct brw_stage_prog_data {
>>  
>> GLuint nr_params;   /**< number of float params/constants */
>> GLuint nr_pull_params;
>> +   GLuint nr_ubo_params;
>> +   GLuint nr_gather_table;
> 
> I would introduce these as non gl-types - we are trying to move away from
> them. Perhaps change "nr_params" and "nr_pull_params" while you are at it.
> 
>>  
>> unsigned curb_read_length;
>> unsigned total_scratch;
>> +   unsigned max_ubo_const_block;
>>  
>> /**
>>  * Register where the thread expects to find input data from the URB
>> @@ -375,6 +378,12 @@ struct brw_stage_prog_data {
>>  */
>> const gl_constant_value **param;
>> const gl_constant_value **pull_param;
>> +   struct {
>> +  int reg;
>> +  unsigned channel_mask;
>> +  unsigned const_block;
>> +  unsigned const_offset;
>> +   } *gather_table;
>>  };
> 
> Below in brw_shader.h you do:
> 
>struct gather_table {
>   int reg;
>   unsigned channel_mask;
>   unsigned const_block;
>   unsigned const_offset;
>};
>gather_table *ubo_gather_table;
> 
> Why not here?

I guess you're right. Yep, I can probably re-use the same definition in
the next version. Thanks!

> 
>>  
>>  /* Data about a particular attempt to compile a program.  Note that
>> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
>> b/src/mesa/drivers/dri/i965/brw_gs.c
>> index bea90d8..97658d5 100644
>> --- a/src/mesa/drivers/dri/i965/brw_gs.c
>> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
>> @@ -70,6 +70,10 @@ brw_compile_gs_prog(struct brw_context *brw,
>> c.prog_data.base.base.pull_param =
>>rzalloc_array(NULL, const gl_constant_value *, param_count);
>> c.prog_data.base.base.nr_params = param_count;
>> +   c.prog_data.base.base.nr_gather_table = 0;
>> +   c.prog_data.base.base.gather_table =
>> +  rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
>> +   (c.prog_data.base.base.nr_params + 
>> c.prog_data.base.base.nr_ubo_params));
> 
> Wrap this line.
> 
>>  
>> if (brw->gen >= 7) {
>>if (gp->program.OutputType == GL_POINTS) {
>> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
>> b/src/mesa/drivers/dri/i965/brw_program.c
>> index 81a0c19..f27c799 100644
>> --- a/src/mesa/drivers/dri/i965/brw_program.c
>> +++ b/src/mesa/drivers/dri/i965/brw_program.c
>> @@ -573,6 +573,10 @@ brw_stage_prog_data_compare(const struct 
>> brw_stage_prog_data *a,
>> if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void 
>> *)))
>>return false;
>>  
>> +   if (memcmp(a->gather_table, b->gather_table,
>> +  a->nr_gather_table * sizeof(*a->gather_table)))
>> +  return false;
>> +
>> return true;
>>  }
>>  
>> @@ -583,6 +587,7 @@ brw_stage_prog_data_free(const void *p)
>>  
>> ralloc_free(prog_data->param);
>> ralloc_free(prog_data->pull_param);
>> +   ralloc_free(prog_data->gather_table);
>>  }
>>  
>>  void
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
>> b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> index 0d6ac0c..8769f67 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> @@ -739,11 +739,13 @@ backend_visitor::backend_visitor(struct brw_context 
>> *brw,
>>   prog(prog),
>>   stage_prog_data(stage_prog_data),
>>   cfg(NULL),
>> - stage(stage)
>> + stage(stage),
>> + ubo_gather_table(NULL)
>>  {
>> debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
>> stage_name = _mesa_shader_stage_to_string(stage);

Re: [Mesa-dev] [PATCH 11/27] i965: Store gather table information in the program data

2015-05-07 Thread Pohjolainen, Topi
On Tue, Apr 28, 2015 at 11:08:08PM +0300, Abdiel Janulgue wrote:
> The resource streamer is able to gather and pack sparsely-located
> constant data from any buffer object by a referring to a gather table
> This patch adds support for keeping track of these constant data
> fetches into a gather table.
> 
> The gather table is generated from two sources. Ordinary uniform fetches
> are stored first. These are then combined with a separate table containing
> UBO entries. The separate entry for UBOs is needed to make it easier to
> generate the gather mask when combining and packing the constant data.
> 
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h  |  9 +
>  src/mesa/drivers/dri/i965/brw_gs.c   |  4 
>  src/mesa/drivers/dri/i965/brw_program.c  |  5 +
>  src/mesa/drivers/dri/i965/brw_shader.cpp |  4 +++-
>  src/mesa/drivers/dri/i965/brw_shader.h   | 11 +++
>  src/mesa/drivers/dri/i965/brw_vs.c   |  5 +
>  src/mesa/drivers/dri/i965/brw_wm.c   |  5 +
>  7 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 7fd49e9..e25c64d 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -355,9 +355,12 @@ struct brw_stage_prog_data {
>  
> GLuint nr_params;   /**< number of float params/constants */
> GLuint nr_pull_params;
> +   GLuint nr_ubo_params;
> +   GLuint nr_gather_table;

I would introduce these as non gl-types - we are trying to move away from
them. Perhaps change "nr_params" and "nr_pull_params" while you are at it.

>  
> unsigned curb_read_length;
> unsigned total_scratch;
> +   unsigned max_ubo_const_block;
>  
> /**
>  * Register where the thread expects to find input data from the URB
> @@ -375,6 +378,12 @@ struct brw_stage_prog_data {
>  */
> const gl_constant_value **param;
> const gl_constant_value **pull_param;
> +   struct {
> +  int reg;
> +  unsigned channel_mask;
> +  unsigned const_block;
> +  unsigned const_offset;
> +   } *gather_table;
>  };

Below in brw_shader.h you do:

   struct gather_table {
  int reg;
  unsigned channel_mask;
  unsigned const_block;
  unsigned const_offset;
   };
   gather_table *ubo_gather_table;

Why not here?

>  
>  /* Data about a particular attempt to compile a program.  Note that
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index bea90d8..97658d5 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -70,6 +70,10 @@ brw_compile_gs_prog(struct brw_context *brw,
> c.prog_data.base.base.pull_param =
>rzalloc_array(NULL, const gl_constant_value *, param_count);
> c.prog_data.base.base.nr_params = param_count;
> +   c.prog_data.base.base.nr_gather_table = 0;
> +   c.prog_data.base.base.gather_table =
> +  rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
> +   (c.prog_data.base.base.nr_params + 
> c.prog_data.base.base.nr_ubo_params));

Wrap this line.

>  
> if (brw->gen >= 7) {
>if (gp->program.OutputType == GL_POINTS) {
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index 81a0c19..f27c799 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -573,6 +573,10 @@ brw_stage_prog_data_compare(const struct 
> brw_stage_prog_data *a,
> if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void 
> *)))
>return false;
>  
> +   if (memcmp(a->gather_table, b->gather_table,
> +  a->nr_gather_table * sizeof(*a->gather_table)))
> +  return false;
> +
> return true;
>  }
>  
> @@ -583,6 +587,7 @@ brw_stage_prog_data_free(const void *p)
>  
> ralloc_free(prog_data->param);
> ralloc_free(prog_data->pull_param);
> +   ralloc_free(prog_data->gather_table);
>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 0d6ac0c..8769f67 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -739,11 +739,13 @@ backend_visitor::backend_visitor(struct brw_context 
> *brw,
>   prog(prog),
>   stage_prog_data(stage_prog_data),
>   cfg(NULL),
> - stage(stage)
> + stage(stage),
> + ubo_gather_table(NULL)
>  {
> debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
> stage_name = _mesa_shader_stage_to_string(stage);
> stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
> +   this->nr_ubo_gather_table = 0;

Any particular reason not to do this in the initializer along with the other
members?

>  }
>  
>  bool
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
> b/src/mesa/drivers/dri/i965/brw_shader.h
> i

[Mesa-dev] [PATCH 11/27] i965: Store gather table information in the program data

2015-04-28 Thread Abdiel Janulgue
The resource streamer is able to gather and pack sparsely-located
constant data from any buffer object by a referring to a gather table
This patch adds support for keeping track of these constant data
fetches into a gather table.

The gather table is generated from two sources. Ordinary uniform fetches
are stored first. These are then combined with a separate table containing
UBO entries. The separate entry for UBOs is needed to make it easier to
generate the gather mask when combining and packing the constant data.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  9 +
 src/mesa/drivers/dri/i965/brw_gs.c   |  4 
 src/mesa/drivers/dri/i965/brw_program.c  |  5 +
 src/mesa/drivers/dri/i965/brw_shader.cpp |  4 +++-
 src/mesa/drivers/dri/i965/brw_shader.h   | 11 +++
 src/mesa/drivers/dri/i965/brw_vs.c   |  5 +
 src/mesa/drivers/dri/i965/brw_wm.c   |  5 +
 7 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 7fd49e9..e25c64d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -355,9 +355,12 @@ struct brw_stage_prog_data {
 
GLuint nr_params;   /**< number of float params/constants */
GLuint nr_pull_params;
+   GLuint nr_ubo_params;
+   GLuint nr_gather_table;
 
unsigned curb_read_length;
unsigned total_scratch;
+   unsigned max_ubo_const_block;
 
/**
 * Register where the thread expects to find input data from the URB
@@ -375,6 +378,12 @@ struct brw_stage_prog_data {
 */
const gl_constant_value **param;
const gl_constant_value **pull_param;
+   struct {
+  int reg;
+  unsigned channel_mask;
+  unsigned const_block;
+  unsigned const_offset;
+   } *gather_table;
 };
 
 /* Data about a particular attempt to compile a program.  Note that
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index bea90d8..97658d5 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -70,6 +70,10 @@ brw_compile_gs_prog(struct brw_context *brw,
c.prog_data.base.base.pull_param =
   rzalloc_array(NULL, const gl_constant_value *, param_count);
c.prog_data.base.base.nr_params = param_count;
+   c.prog_data.base.base.nr_gather_table = 0;
+   c.prog_data.base.base.gather_table =
+  rzalloc_size(NULL, sizeof(*c.prog_data.base.base.gather_table) *
+   (c.prog_data.base.base.nr_params + 
c.prog_data.base.base.nr_ubo_params));
 
if (brw->gen >= 7) {
   if (gp->program.OutputType == GL_POINTS) {
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 81a0c19..f27c799 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -573,6 +573,10 @@ brw_stage_prog_data_compare(const struct 
brw_stage_prog_data *a,
if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void 
*)))
   return false;
 
+   if (memcmp(a->gather_table, b->gather_table,
+  a->nr_gather_table * sizeof(*a->gather_table)))
+  return false;
+
return true;
 }
 
@@ -583,6 +587,7 @@ brw_stage_prog_data_free(const void *p)
 
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
+   ralloc_free(prog_data->gather_table);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0d6ac0c..8769f67 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -739,11 +739,13 @@ backend_visitor::backend_visitor(struct brw_context *brw,
  prog(prog),
  stage_prog_data(stage_prog_data),
  cfg(NULL),
- stage(stage)
+ stage(stage),
+ ubo_gather_table(NULL)
 {
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
stage_name = _mesa_shader_stage_to_string(stage);
stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
+   this->nr_ubo_gather_table = 0;
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index 8a3263e..db0018f 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -204,6 +204,17 @@ public:
void assign_common_binding_table_offsets(uint32_t 
next_binding_table_offset);
 
virtual void invalidate_live_intervals() = 0;
+
+   /** Gather table entries for UBOs */
+   unsigned nr_ubo_gather_table;
+
+   struct gather_table {
+  int reg;
+  unsigned channel_mask;
+  unsigned const_block;
+  unsigned const_offset;
+   };
+   gather_table *ubo_gather_table;
 };
 
 uint32_t brw_texture_offset(struct gl_context *ctx, int *offsets,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index dabff43..52333c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/