On Wed, Dec 9, 2015 at 8:23 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > Instead of using reladdr, this commit changes the FS backend to emit a > MOV_INDIRECT whenever we need an indirect uniform load. We also have to > rework some of the other bits of the backend to handle this new form of > uniform load. The obvious change is that demote_pull_constants now acts > more like a lowering pass when it hits a MOV_INDIRECT. > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 72 > +++++++++++++++++++------------- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++++++++++++++++++----- > 2 files changed, 86 insertions(+), 39 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index bf446d2..7cc03c5 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1945,8 +1945,8 @@ fs_visitor::assign_constant_locations() > if (inst->src[i].file != UNIFORM) > continue; > > - if (inst->src[i].reladdr) { > - int uniform = inst->src[i].nr; > + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { > + int uniform = inst->src[0].nr; > > /* If this array isn't already present in the pull constant > buffer, > * add it. > @@ -2028,49 +2028,63 @@ fs_visitor::assign_constant_locations() > void > fs_visitor::demote_pull_constants() > { > - foreach_block_and_inst (block, fs_inst, inst, cfg) { > + const unsigned index = > stage_prog_data->binding_table.pull_constants_start; > + > + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { > + /* Set up the annotation tracking for new generated instructions. */ > + const fs_builder ibld(this, block, inst); > + > for (int i = 0; i < inst->sources; i++) { > if (inst->src[i].file != UNIFORM) > continue; > > - int pull_index; > + /* We'll handle this case later */ > + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) > + continue; > + > unsigned location = inst->src[i].nr + inst->src[i].reg_offset; > - if (location >= uniforms) /* Out of bounds access */ > - pull_index = -1; > - else > - pull_index = pull_constant_loc[location]; > + if (location >= uniforms) > + continue; /* Out of bounds access */ > + > + int pull_index = pull_constant_loc[location]; > > if (pull_index == -1) > continue; > > - /* Set up the annotation tracking for new generated instructions. */ > - const fs_builder ibld(this, block, inst); > - const unsigned index = > stage_prog_data->binding_table.pull_constants_start; > - fs_reg dst = vgrf(glsl_type::float_type); > - > assert(inst->src[i].stride == 0); > > - /* Generate a pull load into dst. */ > - if (inst->src[i].reladdr) { > - VARYING_PULL_CONSTANT_LOAD(ibld, dst, > - brw_imm_ud(index), > - *inst->src[i].reladdr, > - pull_index * 4); > - inst->src[i].reladdr = NULL; > - inst->src[i].stride = 1; > - } else { > - const fs_builder ubld = ibld.exec_all().group(8, 0); > - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & > ~15); > - ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, > - dst, brw_imm_ud(index), offset); > - inst->src[i].set_smear(pull_index & 3); > - } > - brw_mark_surface_used(prog_data, index); > + fs_reg dst = vgrf(glsl_type::float_type); > + const fs_builder ubld = ibld.exec_all().group(8, 0); > + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & > ~15); > + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, > + dst, brw_imm_ud(index), offset); > > /* Rewrite the instruction to use the temporary VGRF. */ > inst->src[i].file = VGRF; > inst->src[i].nr = dst.nr; > inst->src[i].reg_offset = 0; > + inst->src[i].set_smear(pull_index & 3); > + > + brw_mark_surface_used(prog_data, index); > + } > + > + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && > + inst->src[0].file == UNIFORM) { > + > + unsigned location = inst->src[0].nr + inst->src[0].reg_offset; > + if (location >= uniforms) > + continue; /* Out of bounds access */ > + > + int pull_index = pull_constant_loc[location]; > + assert(pull_index >= 0); /* This had better be pull */ > + > + VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, > + brw_imm_ud(index), > + inst->src[1], > + pull_index * 4); > + inst->remove(block); > + > + brw_mark_surface_used(prog_data, index); > } > } > invalidate_live_intervals(); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 15d9b1c..bf239c3 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -1136,6 +1136,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var > *deref) > { > fs_reg image(UNIFORM, deref->var->data.driver_location / 4, > BRW_REGISTER_TYPE_UD); > + fs_reg indirect; > + unsigned indirect_max = 0; > > for (const nir_deref *tail = &deref->deref; tail->child; > tail = tail->child) { > @@ -1147,7 +1149,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var > *deref) > image = offset(image, bld, base * element_size); > > if (deref_array->deref_array_type == nir_deref_array_type_indirect) { > - fs_reg tmp = vgrf(glsl_type::int_type); > + fs_reg tmp = vgrf(glsl_type::uint_type); > > if (devinfo->gen == 7 && !devinfo->is_haswell) { > /* IVB hangs when trying to access an invalid surface index with > @@ -1166,14 +1168,29 @@ fs_visitor::get_nir_image_deref(const nir_deref_var > *deref) > } > > bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); > - if (image.reladdr) > - bld.ADD(*image.reladdr, *image.reladdr, tmp); > - else > - image.reladdr = new(mem_ctx) fs_reg(tmp); > + if (indirect.file == BAD_FILE) { > + indirect = tmp; > + indirect_max = type_size_scalar(tail->type) - > BRW_IMAGE_PARAM_SIZE; > + } else { > + bld.ADD(indirect, indirect, tmp); > + } > } > } > > - return image; > + if (indirect.file == BAD_FILE) { > + return image; > + } else { > + /* Emit a pile of MOVs to load the uniform into a temporary. The > + * dead-code elimination pass will get rid of what we don't use. > + */ > + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); > + for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { > + bld.emit(SHADER_OPCODE_MOV_INDIRECT, > + offset(tmp, bld, j), offset(image, bld, j), > + indirect, brw_imm_ud((indirect_max + 1) * 4)); > + } > + return tmp; > + } > } > > void > @@ -2302,12 +2319,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > /* Offsets are in bytes but they should always be multiples of 4 */ > assert(const_offset->u[0] % 4 == 0); > src.reg_offset = const_offset->u[0] / 4; > + > + for (unsigned j = 0; j < instr->num_components; j++) { > + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); > + } > } else { > - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); > - } > + fs_reg indirect = retype(get_nir_src(instr->src[0]), > + BRW_REGISTER_TYPE_UD); > > - for (unsigned j = 0; j < instr->num_components; j++) { > - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); > + /* Se need to pass a size to the MOV_INDIRECT but we don't want it > to
s/Se/We/ _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev