Re: [Mesa-dev] [PATCH 08/15] i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants
On Thu, Dec 10, 2015 at 1:25 AM, Michael Schellenberger Costawrote: > Hi Jason, Hi! Please remember to reply-all so it goes to the list. :-) > Am 10.12.2015 um 05:23 schrieb Jason Ekstrand: >> This commit moves us to an instruction based model rather than a >> register-based model for indirects. This is more accurate anyway as we >> have to emit instructions to resolve the reladdr. It's also a lot simpler >> because it gets rid of the recursive reladdr problem by design. >> >> One side-effect of this is that we need a whole new algorithm in >> move_uniform_array_access_to_pull_constants. This new algorithm is much >> more straightforward than the old one and is fairly similar to what we're >> already doing in the FS backend. >> --- >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- >> src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- >> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 +-- >> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 >> -- >> 4 files changed, 50 insertions(+), 51 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> index a697bdf..e4a405b 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants() >>dst_reg temp = dst_reg(this, glsl_type::vec4_type); >> >>emit_pull_constant_load(block, inst, temp, inst->src[i], >> - pull_constant_loc[uniform]); >> + pull_constant_loc[uniform], src_reg()); >> >>inst->src[i].file = temp.file; >> inst->src[i].nr = temp.nr; >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h >> b/src/mesa/drivers/dri/i965/brw_vec4.h >> index f2e5ce1..e6d6c82 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.h >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h >> @@ -293,7 +293,8 @@ public: >> void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, >> dst_reg dst, >> src_reg orig_src, >> - int base_offset); >> + int base_offset, >> +src_reg indirect); >> void emit_pull_constant_load_reg(dst_reg dst, >> src_reg surf_index, >> src_reg offset, >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> index f965b39..58b6612 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> /* Offsets are in bytes but they should always be multiples of 16 >> */ >> assert(const_offset->u[0] % 16 == 0); >> src.reg_offset = const_offset->u[0] / 16; >> + >> + emit(MOV(dest, src)); >>} else { >> - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); >> - src.reladdr = new(mem_ctx) src_reg(tmp); >> - } >> + src_reg indirect = get_nir_src(instr->src[0], >> BRW_REGISTER_TYPE_UD, 1); >> >> - emit(MOV(dest, src)); >> + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, >> + indirect, brw_imm_ud(instr->const_index[1])); >> + } >>break; >> } >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> index 7712d34..e7ab536 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch() >> void >> vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction >> *inst, >> dst_reg temp, src_reg orig_src, >> - int base_offset) >> + int base_offset, src_reg indirect) >> { >> int reg_offset = base_offset + orig_src.reg_offset; >> const unsigned index = >> prog_data->base.binding_table.pull_constants_start; >> >> src_reg offset; >> - if (orig_src.reladdr) { >> + if (indirect.file != BAD_FILE) { >>offset = src_reg(this, glsl_type::int_type); >> >> - emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr, >> + emit_before(block, inst, ADD(dst_reg(offset), indirect, >> brw_imm_d(reg_offset * 16))); >> } else if (devinfo->gen >= 8) { >>/* Store the offset in a GRF so we can send-from-GRF. */ >> @@ -1685,59 +1685,55 @@ >> vec4_visitor::move_uniform_array_access_to_pull_constants() >> { >> int pull_constant_loc[this->uniforms]; >> memset(pull_constant_loc, -1,
[Mesa-dev] [PATCH 08/15] i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants
This commit moves us to an instruction based model rather than a register-based model for indirects. This is more accurate anyway as we have to emit instructions to resolve the reladdr. It's also a lot simpler because it gets rid of the recursive reladdr problem by design. One side-effect of this is that we need a whole new algorithm in move_uniform_array_access_to_pull_constants. This new algorithm is much more straightforward than the old one and is fairly similar to what we're already doing in the FS backend. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 +-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 -- 4 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a697bdf..e4a405b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants() dst_reg temp = dst_reg(this, glsl_type::vec4_type); emit_pull_constant_load(block, inst, temp, inst->src[i], -pull_constant_loc[uniform]); +pull_constant_loc[uniform], src_reg()); inst->src[i].file = temp.file; inst->src[i].nr = temp.nr; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f2e5ce1..e6d6c82 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -293,7 +293,8 @@ public: void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, - int base_offset); + int base_offset, +src_reg indirect); void emit_pull_constant_load_reg(dst_reg dst, src_reg surf_index, src_reg offset, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index f965b39..58b6612 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Offsets are in bytes but they should always be multiples of 16 */ assert(const_offset->u[0] % 16 == 0); src.reg_offset = const_offset->u[0] / 16; + + emit(MOV(dest, src)); } else { - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); - src.reladdr = new(mem_ctx) src_reg(tmp); - } + src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); - emit(MOV(dest, src)); + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, + indirect, brw_imm_ud(instr->const_index[1])); + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 7712d34..e7ab536 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; src_reg offset; - if (orig_src.reladdr) { + if (indirect.file != BAD_FILE) { offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr, + emit_before(block, inst, ADD(dst_reg(offset), indirect, brw_imm_d(reg_offset * 16))); } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ @@ -1685,59 +1685,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() { int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); - bool nested_reladdr; - /* Walk through and find array access of uniforms. Put a copy of that -* uniform in the pull constant buffer. -* -* Note that we don't move constant-indexed accesses to arrays. No -* testing has been done of the performance impact of this choice. + /* First, walk through the instructions and determine which things need to +* be pulled. We mark something as needing to bye pulled by setting +* pull_constant_loc to 0. */ - do { -