Re: [Mesa-dev] [PATCH 08/15] i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants

2015-12-10 Thread Jason Ekstrand
On Thu, Dec 10, 2015 at 1:25 AM, Michael Schellenberger Costa
 wrote:
> Hi Jason,

Hi!  Please remember to reply-all so it goes to the list. :-)

> Am 10.12.2015 um 05:23 schrieb Jason Ekstrand:
>> This commit moves us to an instruction based model rather than a
>> register-based model for indirects.  This is more accurate anyway as we
>> have to emit instructions to resolve the reladdr.  It's also a lot simpler
>> because it gets rid of the recursive reladdr problem by design.
>>
>> One side-effect of this is that we need a whole new algorithm in
>> move_uniform_array_access_to_pull_constants.  This new algorithm is much
>> more straightforward than the old one and is fairly similar to what we're
>> already doing in the FS backend.
>> ---
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp |  2 +-
>>  src/mesa/drivers/dri/i965/brw_vec4.h   |  3 +-
>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 +--
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 
>> --
>>  4 files changed, 50 insertions(+), 51 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index a697bdf..e4a405b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
>>dst_reg temp = dst_reg(this, glsl_type::vec4_type);
>>
>>emit_pull_constant_load(block, inst, temp, inst->src[i],
>> -  pull_constant_loc[uniform]);
>> +  pull_constant_loc[uniform], src_reg());
>>
>>inst->src[i].file = temp.file;
>>   inst->src[i].nr = temp.nr;
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
>> b/src/mesa/drivers/dri/i965/brw_vec4.h
>> index f2e5ce1..e6d6c82 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
>> @@ -293,7 +293,8 @@ public:
>> void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
>>   dst_reg dst,
>>   src_reg orig_src,
>> - int base_offset);
>> + int base_offset,
>> +src_reg indirect);
>> void emit_pull_constant_load_reg(dst_reg dst,
>>  src_reg surf_index,
>>  src_reg offset,
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index f965b39..58b6612 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
>> *instr)
>>   /* Offsets are in bytes but they should always be multiples of 16 
>> */
>>   assert(const_offset->u[0] % 16 == 0);
>>   src.reg_offset = const_offset->u[0] / 16;
>> +
>> + emit(MOV(dest, src));
>>} else {
>> - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
>> - src.reladdr = new(mem_ctx) src_reg(tmp);
>> -  }
>> + src_reg indirect = get_nir_src(instr->src[0], 
>> BRW_REGISTER_TYPE_UD, 1);
>>
>> -  emit(MOV(dest, src));
>> + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
>> +  indirect, brw_imm_ud(instr->const_index[1]));
>> +  }
>>break;
>> }
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index 7712d34..e7ab536 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch()
>>  void
>>  vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction 
>> *inst,
>> dst_reg temp, src_reg orig_src,
>> -   int base_offset)
>> +   int base_offset, src_reg indirect)
>>  {
>> int reg_offset = base_offset + orig_src.reg_offset;
>> const unsigned index = 
>> prog_data->base.binding_table.pull_constants_start;
>>
>> src_reg offset;
>> -   if (orig_src.reladdr) {
>> +   if (indirect.file != BAD_FILE) {
>>offset = src_reg(this, glsl_type::int_type);
>>
>> -  emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr,
>> +  emit_before(block, inst, ADD(dst_reg(offset), indirect,
>> brw_imm_d(reg_offset * 16)));
>> } else if (devinfo->gen >= 8) {
>>/* Store the offset in a GRF so we can send-from-GRF. */
>> @@ -1685,59 +1685,55 @@ 
>> vec4_visitor::move_uniform_array_access_to_pull_constants()
>>  {
>> int pull_constant_loc[this->uniforms];
>> memset(pull_constant_loc, -1, 

[Mesa-dev] [PATCH 08/15] i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants

2015-12-09 Thread Jason Ekstrand
This commit moves us to an instruction based model rather than a
register-based model for indirects.  This is more accurate anyway as we
have to emit instructions to resolve the reladdr.  It's also a lot simpler
because it gets rid of the recursive reladdr problem by design.

One side-effect of this is that we need a whole new algorithm in
move_uniform_array_access_to_pull_constants.  This new algorithm is much
more straightforward than the old one and is fairly similar to what we're
already doing in the FS backend.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4.h   |  3 +-
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 +--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 --
 4 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a697bdf..e4a405b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
 
 emit_pull_constant_load(block, inst, temp, inst->src[i],
-pull_constant_loc[uniform]);
+pull_constant_loc[uniform], src_reg());
 
 inst->src[i].file = temp.file;
  inst->src[i].nr = temp.nr;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index f2e5ce1..e6d6c82 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -293,7 +293,8 @@ public:
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
-   int base_offset);
+   int base_offset,
+src_reg indirect);
void emit_pull_constant_load_reg(dst_reg dst,
 src_reg surf_index,
 src_reg offset,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index f965b39..58b6612 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
  /* Offsets are in bytes but they should always be multiples of 16 */
  assert(const_offset->u[0] % 16 == 0);
  src.reg_offset = const_offset->u[0] / 16;
+
+ emit(MOV(dest, src));
   } else {
- src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
- src.reladdr = new(mem_ctx) src_reg(tmp);
-  }
+ src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 
1);
 
-  emit(MOV(dest, src));
+ emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
+  indirect, brw_imm_ud(instr->const_index[1]));
+  }
   break;
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 7712d34..e7ab536 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch()
 void
 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
  dst_reg temp, src_reg orig_src,
- int base_offset)
+ int base_offset, src_reg indirect)
 {
int reg_offset = base_offset + orig_src.reg_offset;
const unsigned index = prog_data->base.binding_table.pull_constants_start;
 
src_reg offset;
-   if (orig_src.reladdr) {
+   if (indirect.file != BAD_FILE) {
   offset = src_reg(this, glsl_type::int_type);
 
-  emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr,
+  emit_before(block, inst, ADD(dst_reg(offset), indirect,
brw_imm_d(reg_offset * 16)));
} else if (devinfo->gen >= 8) {
   /* Store the offset in a GRF so we can send-from-GRF. */
@@ -1685,59 +1685,55 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 {
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
-   bool nested_reladdr;
 
-   /* Walk through and find array access of uniforms.  Put a copy of that
-* uniform in the pull constant buffer.
-*
-* Note that we don't move constant-indexed accesses to arrays.  No
-* testing has been done of the performance impact of this choice.
+   /* First, walk through the instructions and determine which things need to
+* be pulled.  We mark something as needing to bye pulled by setting
+* pull_constant_loc to 0.
 */
-   do {
-