On Tue, 2015-12-01 at 13:00 -0800, Jordan Justen wrote: > On 2015-12-01 04:45:05, Iago Toral wrote: > > On Tue, 2015-12-01 at 00:19 -0800, Jordan Justen wrote: > > > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > > > --- > > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 > > > ++++++++++++++++++++++++++++++++ > > > 1 file changed, 52 insertions(+) > > > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > index 12a8b59..6cbb0e2 100644 > > > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > @@ -2472,6 +2472,58 @@ fs_visitor::nir_emit_intrinsic(const fs_builder > > > &bld, nir_intrinsic_instr *instr > > > break; > > > } > > > > > > + case nir_intrinsic_store_shared_indirect: > > > + has_indirect = true; > > > + /* fallthrough */ > > > + case nir_intrinsic_store_shared: { > > > + assert(devinfo->gen >= 7); > > > + > > > + /* Block index */ > > > + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); > > > + > > > + /* Offset */ > > > + fs_reg offset_reg = vgrf(glsl_type::uint_type); > > > + unsigned const_offset_bytes = 0; > > > + if (has_indirect) { > > > + bld.MOV(offset_reg, get_nir_src(instr->src[1])); > > > + } else { > > > + const_offset_bytes = instr->const_index[0]; > > > + bld.MOV(offset_reg, brw_imm_ud(const_offset_bytes)); > > > + } > > > + > > > + /* Value */ > > > + fs_reg val_reg = get_nir_src(instr->src[0]); > > > + > > > + /* Writemask */ > > > + unsigned writemask = instr->const_index[1]; > > > + > > > + /* Write each component present in the writemask */ > > > > I made a comment in v2 that this loop is based on early ssbo code that > > was not optimized (it always emits a write for each component). The > > current implementation for ssbo store is better and I think it is only a > > matter of copying the same loop here, since the implementation is the > > same as in the case of ssbos. > > Noted (in v2 :) > > http://lists.freedesktop.org/archives/mesa-dev/2015-November/101866.html > > -Jordan
Ah, sorry I had missed your reply. I have just reviewed the v4 version with the optimized version. Iago > > > > > + unsigned skipped_channels = 0; > > > + for (int i = 0; i < instr->num_components; i++) { > > > + int component_mask = 1 << i; > > > + if (writemask & component_mask) { > > > + if (skipped_channels) { > > > + if (!has_indirect) { > > > + const_offset_bytes += 4 * skipped_channels; > > > + bld.MOV(offset_reg, brw_imm_ud(const_offset_bytes)); > > > + } else { > > > + bld.ADD(offset_reg, offset_reg, > > > + brw_imm_ud(4 * skipped_channels)); > > > + } > > > + skipped_channels = 0; > > > + } > > > + > > > + emit_untyped_write(bld, surf_index, offset_reg, > > > + offset(val_reg, bld, i), > > > + 1 /* dims */, 1 /* size */, > > > + BRW_PREDICATE_NONE); > > > + } > > > + > > > + skipped_channels++; > > > + } > > > + break; > > > + } > > > + > > > case nir_intrinsic_load_input_indirect: > > > has_indirect = true; > > > /* fallthrough */ > > > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev