On Wednesday, June 15, 2016 9:25:45 AM PDT Samuel Iglesias Gonsálvez wrote: > From the Cherryview's PRM, Volume 7, 3D Media GPGPU Engine, Register Region > Restrictions, page 844: > > "When source or destination datatype is 64b or operation is integer DWord > multiply, indirect addressing must not be used." > > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > Cc: "12.0" <mesa-sta...@lists.freedesktop.org> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 > +++++++++++++++++++++++++++++--- > 1 file changed, 42 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index c271e64..be162ff 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -3602,6 +3602,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > } else { > fs_reg indirect = retype(get_nir_src(instr->src[0]), > BRW_REGISTER_TYPE_UD); > + int num_components = instr->num_components; > > /* We need to pass a size to the MOV_INDIRECT but we don't want it > to > * go past the end of the uniform. In order to keep the n'th > @@ -3609,14 +3610,51 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > * one component of the vector. > */ > assert(instr->const_index[1] >= > - instr->num_components * (int) type_sz(dest.type)); > + num_components * (int) type_sz(dest.type)); > + > unsigned read_size = instr->const_index[1] - > - (instr->num_components - 1) * type_sz(dest.type); > + (num_components - 1) * type_sz(dest.type); > + > + fs_reg temp = dest; > + fs_reg indirect_chv_high_32bit; > + bool is_cherryview_64bit = > + devinfo->is_cherryview && type_sz(dest.type) == 8; > + if (is_cherryview_64bit) { > + /* Duplicate the number of components because we will read > + * each 64-bit component with two 32-bit reads. > + */ > + num_components *= 2; > + /* Temporary destination to save the data. We will do a shuffle > + * later. > + */ > + temp = bld.vgrf(BRW_REGISTER_TYPE_UD, num_components);
I think if you just used inst->num_components * 2 here... > + indirect_chv_high_32bit = vgrf(glsl_type::uint_type); > + /* Calculate indirect address to read high 32 bits */ > + bld.ADD(indirect_chv_high_32bit, indirect, brw_imm_ud(4)); > + } > > - for (unsigned j = 0; j < instr->num_components; j++) { > + int i, j; > + for (j = 0, i = 0; i < num_components; j++, i++) { and then left it as j < instr->num_components rather than i... then you wouldn't have to make a num_components temporary nor multiply it by 2. Might be a little simpler? > bld.emit(SHADER_OPCODE_MOV_INDIRECT, > - offset(dest, bld, j), offset(src, bld, j), > + offset(temp, bld, i), offset(src, bld, j), > indirect, brw_imm_ud(read_size)); > + > + if (is_cherryview_64bit) { > + /* Read higher 32 bits, increase 'i' to save the > + * data in the right destination register's offset. > + */ > + i++; > + bld.emit(SHADER_OPCODE_MOV_INDIRECT, > + offset(temp, bld, i), offset(src, bld, j), > + indirect_chv_high_32bit, brw_imm_ud(read_size)); > + } > + } > + > + if (is_cherryview_64bit) { > + shuffle_32bit_load_result_to_64bit_data(bld, > + dest, > + temp, > + instr->num_components); I suspect you could do this without a shuffle by subscripting the destinations directly...plus, you have byte-offsets per channel on the source data, so you can definitely have it come in however you like (unlike URB reads). Still, this should work as is, and fixes a problem, so: Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> I think it could probably be done in a simpler fashion, though. > } > } > break; >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev