Re: [Mesa-dev] [PATCH 2/2] i965/fs: indirect addressing with doubles is not supported in CHV/BSW
On 17/06/16 08:57, Kenneth Graunke wrote: > On Wednesday, June 15, 2016 9:25:45 AM PDT Samuel Iglesias Gonsálvez wrote: >> From the Cherryview's PRM, Volume 7, 3D Media GPGPU Engine, Register Region >> Restrictions, page 844: >> >> "When source or destination datatype is 64b or operation is integer DWord >>multiply, indirect addressing must not be used." >> >> Signed-off-by: Samuel Iglesias Gonsálvez>> Cc: "12.0" >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 >> --- >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 >> +--- >> 1 file changed, 42 insertions(+), 4 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index c271e64..be162ff 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -3602,6 +3602,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , >> nir_intrinsic_instr *instr >>} else { >> fs_reg indirect = retype(get_nir_src(instr->src[0]), >>BRW_REGISTER_TYPE_UD); >> + int num_components = instr->num_components; >> >> /* We need to pass a size to the MOV_INDIRECT but we don't want it >> to >>* go past the end of the uniform. In order to keep the n'th >> @@ -3609,14 +3610,51 @@ fs_visitor::nir_emit_intrinsic(const fs_builder >> , nir_intrinsic_instr *instr >>* one component of the vector. >>*/ >> assert(instr->const_index[1] >= >> -instr->num_components * (int) type_sz(dest.type)); >> +num_components * (int) type_sz(dest.type)); >> + >> unsigned read_size = instr->const_index[1] - >> -(instr->num_components - 1) * type_sz(dest.type); >> +(num_components - 1) * type_sz(dest.type); >> + >> + fs_reg temp = dest; >> + fs_reg indirect_chv_high_32bit; >> + bool is_cherryview_64bit = >> +devinfo->is_cherryview && type_sz(dest.type) == 8; >> + if (is_cherryview_64bit) { >> +/* Duplicate the number of components because we will read >> + * each 64-bit component with two 32-bit reads. >> + */ >> +num_components *= 2; >> +/* Temporary destination to save the data. We will do a shuffle >> + * later. >> + */ >> +temp = bld.vgrf(BRW_REGISTER_TYPE_UD, num_components); > > I think if you just used inst->num_components * 2 here... > >> +indirect_chv_high_32bit = vgrf(glsl_type::uint_type); >> +/* Calculate indirect address to read high 32 bits */ >> +bld.ADD(indirect_chv_high_32bit, indirect, brw_imm_ud(4)); >> + } >> >> - for (unsigned j = 0; j < instr->num_components; j++) { >> + int i, j; >> + for (j = 0, i = 0; i < num_components; j++, i++) { > > and then left it as j < instr->num_components rather than i... > then you wouldn't have to make a num_components temporary nor > multiply it by 2. Might be a little simpler? > Yes, it is simpler. I am going to do it. >> bld.emit(SHADER_OPCODE_MOV_INDIRECT, >> - offset(dest, bld, j), offset(src, bld, j), >> + offset(temp, bld, i), offset(src, bld, j), >> indirect, brw_imm_ud(read_size)); >> + >> +if (is_cherryview_64bit) { >> + /* Read higher 32 bits, increase 'i' to save the >> +* data in the right destination register's offset. >> +*/ >> + i++; >> + bld.emit(SHADER_OPCODE_MOV_INDIRECT, >> +offset(temp, bld, i), offset(src, bld, j), >> +indirect_chv_high_32bit, brw_imm_ud(read_size)); >> +} >> + } >> + >> + if (is_cherryview_64bit) { >> +shuffle_32bit_load_result_to_64bit_data(bld, >> +dest, >> +temp, >> +instr->num_components); > > I suspect you could do this without a shuffle by subscripting the > destinations directly...plus, you have byte-offsets per channel on the > source data, so you can definitely have it come in however you like > (unlike URB reads). > Right. I am going to test this solution. If it works, I will send a patch implementing it. > Still, this should work as is, and fixes a problem, so: > Reviewed-by: Kenneth Graunke > > I think it could probably be done in a simpler fashion, though. > OK, thanks. Sam >> } >>} >>break; >> > signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list
Re: [Mesa-dev] [PATCH 2/2] i965/fs: indirect addressing with doubles is not supported in CHV/BSW
On Wednesday, June 15, 2016 9:25:45 AM PDT Samuel Iglesias Gonsálvez wrote: > From the Cherryview's PRM, Volume 7, 3D Media GPGPU Engine, Register Region > Restrictions, page 844: > > "When source or destination datatype is 64b or operation is integer DWord >multiply, indirect addressing must not be used." > > Signed-off-by: Samuel Iglesias Gonsálvez> Cc: "12.0" > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 > +--- > 1 file changed, 42 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index c271e64..be162ff 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -3602,6 +3602,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , > nir_intrinsic_instr *instr >} else { > fs_reg indirect = retype(get_nir_src(instr->src[0]), >BRW_REGISTER_TYPE_UD); > + int num_components = instr->num_components; > > /* We need to pass a size to the MOV_INDIRECT but we don't want it > to >* go past the end of the uniform. In order to keep the n'th > @@ -3609,14 +3610,51 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , > nir_intrinsic_instr *instr >* one component of the vector. >*/ > assert(instr->const_index[1] >= > -instr->num_components * (int) type_sz(dest.type)); > +num_components * (int) type_sz(dest.type)); > + > unsigned read_size = instr->const_index[1] - > -(instr->num_components - 1) * type_sz(dest.type); > +(num_components - 1) * type_sz(dest.type); > + > + fs_reg temp = dest; > + fs_reg indirect_chv_high_32bit; > + bool is_cherryview_64bit = > +devinfo->is_cherryview && type_sz(dest.type) == 8; > + if (is_cherryview_64bit) { > +/* Duplicate the number of components because we will read > + * each 64-bit component with two 32-bit reads. > + */ > +num_components *= 2; > +/* Temporary destination to save the data. We will do a shuffle > + * later. > + */ > +temp = bld.vgrf(BRW_REGISTER_TYPE_UD, num_components); I think if you just used inst->num_components * 2 here... > +indirect_chv_high_32bit = vgrf(glsl_type::uint_type); > +/* Calculate indirect address to read high 32 bits */ > +bld.ADD(indirect_chv_high_32bit, indirect, brw_imm_ud(4)); > + } > > - for (unsigned j = 0; j < instr->num_components; j++) { > + int i, j; > + for (j = 0, i = 0; i < num_components; j++, i++) { and then left it as j < instr->num_components rather than i... then you wouldn't have to make a num_components temporary nor multiply it by 2. Might be a little simpler? > bld.emit(SHADER_OPCODE_MOV_INDIRECT, > - offset(dest, bld, j), offset(src, bld, j), > + offset(temp, bld, i), offset(src, bld, j), > indirect, brw_imm_ud(read_size)); > + > +if (is_cherryview_64bit) { > + /* Read higher 32 bits, increase 'i' to save the > +* data in the right destination register's offset. > +*/ > + i++; > + bld.emit(SHADER_OPCODE_MOV_INDIRECT, > +offset(temp, bld, i), offset(src, bld, j), > +indirect_chv_high_32bit, brw_imm_ud(read_size)); > +} > + } > + > + if (is_cherryview_64bit) { > +shuffle_32bit_load_result_to_64bit_data(bld, > +dest, > +temp, > +instr->num_components); I suspect you could do this without a shuffle by subscripting the destinations directly...plus, you have byte-offsets per channel on the source data, so you can definitely have it come in however you like (unlike URB reads). Still, this should work as is, and fixes a problem, so: Reviewed-by: Kenneth Graunke I think it could probably be done in a simpler fashion, though. > } >} >break; > signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965/fs: indirect addressing with doubles is not supported in CHV/BSW
From the Cherryview's PRM, Volume 7, 3D Media GPGPU Engine, Register Region Restrictions, page 844: "When source or destination datatype is 64b or operation is integer DWord multiply, indirect addressing must not be used." Signed-off-by: Samuel Iglesias GonsálvezCc: "12.0" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 +--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c271e64..be162ff 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3602,6 +3602,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr } else { fs_reg indirect = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD); + int num_components = instr->num_components; /* We need to pass a size to the MOV_INDIRECT but we don't want it to * go past the end of the uniform. In order to keep the n'th @@ -3609,14 +3610,51 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr * one component of the vector. */ assert(instr->const_index[1] >= -instr->num_components * (int) type_sz(dest.type)); +num_components * (int) type_sz(dest.type)); + unsigned read_size = instr->const_index[1] - -(instr->num_components - 1) * type_sz(dest.type); +(num_components - 1) * type_sz(dest.type); + + fs_reg temp = dest; + fs_reg indirect_chv_high_32bit; + bool is_cherryview_64bit = +devinfo->is_cherryview && type_sz(dest.type) == 8; + if (is_cherryview_64bit) { +/* Duplicate the number of components because we will read + * each 64-bit component with two 32-bit reads. + */ +num_components *= 2; +/* Temporary destination to save the data. We will do a shuffle + * later. + */ +temp = bld.vgrf(BRW_REGISTER_TYPE_UD, num_components); +indirect_chv_high_32bit = vgrf(glsl_type::uint_type); +/* Calculate indirect address to read high 32 bits */ +bld.ADD(indirect_chv_high_32bit, indirect, brw_imm_ud(4)); + } - for (unsigned j = 0; j < instr->num_components; j++) { + int i, j; + for (j = 0, i = 0; i < num_components; j++, i++) { bld.emit(SHADER_OPCODE_MOV_INDIRECT, - offset(dest, bld, j), offset(src, bld, j), + offset(temp, bld, i), offset(src, bld, j), indirect, brw_imm_ud(read_size)); + +if (is_cherryview_64bit) { + /* Read higher 32 bits, increase 'i' to save the +* data in the right destination register's offset. +*/ + i++; + bld.emit(SHADER_OPCODE_MOV_INDIRECT, +offset(temp, bld, i), offset(src, bld, j), +indirect_chv_high_32bit, brw_imm_ud(read_size)); +} + } + + if (is_cherryview_64bit) { +shuffle_32bit_load_result_to_64bit_data(bld, +dest, +temp, +instr->num_components); } } break; -- 2.8.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev