On Fri, 2016-07-15 at 21:20 +1000, Timothy Arceri wrote: > On Fri, 2016-07-15 at 13:16 +0200, Iago Toral wrote: > > > > On Fri, 2016-07-15 at 20:59 +1000, Timothy Arceri wrote: > > > > > > On Fri, 2016-07-15 at 11:04 +0200, Iago Toral Quiroga wrote: > > > > > > > > > > > > We totally ignored this before because there were no piglit > > > > tests > > > > for > > > > indirect loads in tessellation stages with doubles. > > > > > > > > Cc: "12.0" <mesa-sta...@lists.freedesktop.org> > > > > --- > > > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 86 > > > > ++++++++++++++++++++++++-------- > > > > 1 file changed, 64 insertions(+), 22 deletions(-) > > > > > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > > index 55383ff..7d5af78 100644 > > > > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > > > @@ -2926,31 +2926,73 @@ > > > > fs_visitor::nir_emit_tes_intrinsic(const > > > > fs_builder &bld, > > > > } > > > > } else { > > > > /* Indirect indexing - use per-slot offsets as well. > > > > */ > > > > - const fs_reg srcs[] = { > > > > - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), > > > > - indirect_offset > > > > - }; > > > > - fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); > > > > - bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); > > > > > > > > - if (first_component != 0) { > > > > - unsigned read_components = > > > > - instr->num_components + first_component; > > > > - fs_reg tmp = bld.vgrf(dest.type, read_components); > > > > - inst = > > > > bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, > > > > tmp, > > > > - payload); > > > > - inst->regs_written = read_components; > > > > - for (unsigned i = 0; i < instr->num_components; > > > > i++) > > > > { > > > > - bld.MOV(offset(dest, bld, i), > > > > - offset(tmp, bld, i + first_component)); > > > > + /* We can only read two double components with each > > > > URB > > > > read, so > > > > + * we send two read messages in that case, each one > > > > loading > > > > up to > > > > + * two double components. > > > > + */ > > > > + unsigned num_iterations = 1; > > > > + unsigned num_components = instr->num_components; > > > > + fs_reg orig_dest = dest; > > > > + if (type_sz(dest.type) == 8) { > > > Hi, > > > > > > Not your fault as its not currently enabled but can you add the > > > following here to allow component packing to work once it finally > > > gets > > > enabled: > > > > > > first_component = first_component / 2; > > > > > > I believe that should be enought to keep it working once the > > > other > > > patches land. > > I think this is already being done in master, right after the > > assignment to first_component. > Oh yes you are right. All looks good to me then :)
great! > > > > > > > > > > Also I don't think this patch is going to apply cleanly to stable > > > as > > > the component packing cahnges only landed after branching. Aha, good point. In that case I think it is probably better to just drop Cc to stable. Thanks! Iago > > > Thanks, > > > Tim > > > > > > > > > > > > > > > + if (instr->num_components > 2) { > > > > + num_iterations = 2; > > > > + num_components = 2; > > > > + } > > > > + fs_reg tmp = fs_reg(VGRF, alloc.allocate(4), > > > > dest.type); > > > > + dest = tmp; > > > > + } > > > > + > > > > + for (unsigned iter = 0; iter < num_iterations; > > > > iter++) > > > > { > > > > + const fs_reg srcs[] = { > > > > + retype(brw_vec1_grf(0, 0), > > > > BRW_REGISTER_TYPE_UD), > > > > + indirect_offset > > > > + }; > > > > + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, > > > > 2); > > > > + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), > > > > 0); > > > > + > > > > + if (first_component != 0) { > > > > + unsigned read_components = > > > > + num_components + first_component; > > > > + fs_reg tmp = bld.vgrf(dest.type, > > > > read_components); > > > > + inst = > > > > bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, > > > > + payload); > > > > + for (unsigned i = 0; i < num_components; i++) { > > > > + bld.MOV(offset(dest, bld, i), > > > > + offset(tmp, bld, i + > > > > first_component)); > > > > + } > > > > + } else { > > > > + inst = > > > > bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, > > > > + payload); > > > > + } > > > > + inst->mlen = 2; > > > > + inst->offset = imm_offset; > > > > + inst->regs_written = > > > > + ((num_components + first_component) * > > > > type_sz(dest.type) / 4); > > > > + > > > > + /* If we are reading 64-bit data using 32-bit read > > > > messages we need > > > > + * build proper 64-bit data elements by shuffling > > > > the > > > > low and high > > > > + * 32-bit components around like we do for other > > > > things > > > > like UBOs > > > > + * or SSBOs. > > > > + */ > > > > + if (type_sz(dest.type) == 8) { > > > > + shuffle_32bit_load_result_to_64bit_data( > > > > + bld, dest, retype(dest, > > > > BRW_REGISTER_TYPE_F), > > > > num_components); > > > > + > > > > + for (unsigned c = 0; c < num_components; c++) { > > > > + bld.MOV(offset(orig_dest, bld, iter * 2 + > > > > c), > > > > + offset(dest, bld, c)); > > > > + } > > > > + } > > > > + > > > > + /* If we are loading double data and we need a > > > > second > > > > read message > > > > + * adjust the offset > > > > + */ > > > > + if (num_iterations > 1) { > > > > + num_components = instr->num_components - 2; > > > > + imm_offset++; > > > > } > > > > - } else { > > > > - inst = > > > > bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, > > > > dest, > > > > - payload); > > > > - inst->regs_written = instr->num_components; > > > > } > > > > - inst->mlen = 2; > > > > - inst->offset = imm_offset; > > > > } > > > > break; > > > > } > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev