On Thu, Dec 10, 2015 at 1:25 AM, Michael Schellenberger Costa <mschellenbergerco...@googlemail.com> wrote: > Hi Jason,
Hi! Please remember to reply-all so it goes to the list. :-) > Am 10.12.2015 um 05:23 schrieb Jason Ekstrand: >> This commit moves us to an instruction based model rather than a >> register-based model for indirects. This is more accurate anyway as we >> have to emit instructions to resolve the reladdr. It's also a lot simpler >> because it gets rid of the recursive reladdr problem by design. >> >> One side-effect of this is that we need a whole new algorithm in >> move_uniform_array_access_to_pull_constants. This new algorithm is much >> more straightforward than the old one and is fairly similar to what we're >> already doing in the FS backend. >> --- >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- >> src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- >> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 +-- >> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 >> ++++++++++++-------------- >> 4 files changed, 50 insertions(+), 51 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> index a697bdf..e4a405b 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants() >> dst_reg temp = dst_reg(this, glsl_type::vec4_type); >> >> emit_pull_constant_load(block, inst, temp, inst->src[i], >> - pull_constant_loc[uniform]); >> + pull_constant_loc[uniform], src_reg()); >> >> inst->src[i].file = temp.file; >> inst->src[i].nr = temp.nr; >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h >> b/src/mesa/drivers/dri/i965/brw_vec4.h >> index f2e5ce1..e6d6c82 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.h >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h >> @@ -293,7 +293,8 @@ public: >> void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, >> dst_reg dst, >> src_reg orig_src, >> - int base_offset); >> + int base_offset, >> + src_reg indirect); >> void emit_pull_constant_load_reg(dst_reg dst, >> src_reg surf_index, >> src_reg offset, >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> index f965b39..58b6612 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> /* Offsets are in bytes but they should always be multiples of 16 >> */ >> assert(const_offset->u[0] % 16 == 0); >> src.reg_offset = const_offset->u[0] / 16; >> + >> + emit(MOV(dest, src)); >> } else { >> - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); >> - src.reladdr = new(mem_ctx) src_reg(tmp); >> - } >> + src_reg indirect = get_nir_src(instr->src[0], >> BRW_REGISTER_TYPE_UD, 1); >> >> - emit(MOV(dest, src)); >> + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, >> + indirect, brw_imm_ud(instr->const_index[1])); >> + } >> break; >> } >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> index 7712d34..e7ab536 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch() >> void >> vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction >> *inst, >> dst_reg temp, src_reg orig_src, >> - int base_offset) >> + int base_offset, src_reg indirect) >> { >> int reg_offset = base_offset + orig_src.reg_offset; >> const unsigned index = >> prog_data->base.binding_table.pull_constants_start; >> >> src_reg offset; >> - if (orig_src.reladdr) { >> + if (indirect.file != BAD_FILE) { >> offset = src_reg(this, glsl_type::int_type); >> >> - emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr, >> + emit_before(block, inst, ADD(dst_reg(offset), indirect, >> brw_imm_d(reg_offset * 16))); >> } else if (devinfo->gen >= 8) { >> /* Store the offset in a GRF so we can send-from-GRF. */ >> @@ -1685,59 +1685,55 @@ >> vec4_visitor::move_uniform_array_access_to_pull_constants() >> { >> int pull_constant_loc[this->uniforms]; >> memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); >> - bool nested_reladdr; >> >> - /* Walk through and find array access of uniforms. Put a copy of that >> - * uniform in the pull constant buffer. >> - * >> - * Note that we don't move constant-indexed accesses to arrays. No >> - * testing has been done of the performance impact of this choice. >> + /* First, walk through the instructions and determine which things need >> to >> + * be pulled. We mark something as needing to bye pulled by setting > > to be pulled Thanks. Fixed! >> + * pull_constant_loc to 0. >> */ >> - do { >> - nested_reladdr = false; >> - >> - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { >> - for (int i = 0 ; i < 3; i++) { >> - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) >> - continue; >> + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { >> + /* We only care about MOV_INDIRECT of a uniform */ >> + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || >> + inst->src[0].file != UNIFORM) >> + continue; >> >> - int uniform = inst->src[i].nr; >> + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; > > In 03/15 you have > unsigned location = inst->src[i].nr + inst->src[i].reg_offset; > > Maybe use the same type/name? Yeah, we could. We aren't terribly consistent about it. In general (I've edited most of the places recently), we use "uniform", "location", and "constant_nr". I can switch to one of the others if you'd like, but we're not really consistent. >> >> - if (inst->src[i].reladdr->reladdr) >> - nested_reladdr = true; /* will need another pass */ >> + for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++) >> + pull_constant_loc[uniform_nr + j] = 0; >> + } >> >> - /* If this array isn't already present in the pull constant >> buffer, >> - * add it. >> - */ >> - if (pull_constant_loc[uniform] == -1) { >> - const gl_constant_value **values = >> - &stage_prog_data->param[uniform * 4]; >> + /* Next, we walk the list of uniforms and assign real pull constant >> + * locations and set their corresponding entries in pull_param. >> + */ >> + for (int j = 0; j < this->uniforms; j++) { >> + if (pull_constant_loc[j] < 0) >> + continue; >> >> - pull_constant_loc[uniform] = stage_prog_data->nr_pull_params >> / 4; >> + pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4; >> >> - assert(uniform < uniform_array_size); >> - for (int j = 0; j < uniform_size[uniform] * 4; j++) { >> - >> stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] >> - = values[j]; >> - } >> - } >> + for (int i = 0; i < 4; i++) { > Is that 4 correct? Yes, each uniform is a vec4 so it takes up 4 slots. --Jason > Michael >> + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] >> + = stage_prog_data->param[j * 4 + i]; >> + } >> + } >> >> - /* Set up the annotation tracking for new generated >> instructions. */ >> - base_ir = inst->ir; >> - current_annotation = inst->annotation; >> + /* Finally, we can walk through the instructions and lower MOV_INDIRECT >> + * instructions to actual uniform pulls. >> + */ >> + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { >> + /* We only care about MOV_INDIRECT of a uniform */ >> + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || >> + inst->src[0].file != UNIFORM) >> + continue; >> >> - dst_reg temp = dst_reg(this, glsl_type::vec4_type); >> + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; >> >> - emit_pull_constant_load(block, inst, temp, inst->src[i], >> - pull_constant_loc[uniform]); >> + assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); >> >> - inst->src[i].file = temp.file; >> - inst->src[i].nr = temp.nr; >> - inst->src[i].reg_offset = temp.reg_offset; >> - inst->src[i].reladdr = NULL; >> - } >> - } >> - } while (nested_reladdr); >> + emit_pull_constant_load(block, inst, inst->dst, inst->src[0], >> + pull_constant_loc[uniform_nr], inst->src[1]); >> + inst->remove(block); >> + } >> >> /* Now there are no accesses of the UNIFORM file with a reladdr, so >> * no need to track them as larger-than-vec4 objects. This will be >> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev