Ian Romanick <i...@freedesktop.org> writes: > From: Ian Romanick <ian.d.roman...@intel.com> > > Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 ++++++++++++++++++++++------ > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 52 > +++++++++++++++++++++++------- > 2 files changed, 81 insertions(+), 21 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index f15bf3e..f8db28a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -623,8 +623,32 @@ fs_visitor::nir_emit_find_msb_using_lzd(const fs_builder > &bld, > bool is_signed) > { > fs_inst *inst; > + fs_reg temp = src; > > - bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src); > + if (is_signed) { > + /* LZD of an absolute value source almost always does the right > + * thing. There are two problem values: > + *
This comment seems somewhat misleading, there are many more problem values if you attempt to just take the LZD of the absolute value of the argument: Take the negative of any power of two e.g. -8 represented as 0xfffffff8. findMSB(-8) is supposed to give you 2 as result, but '31 - lzd(abs(-8)) == 31 - 28 == 3'. > + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns > + * 0. However, findMSB(int(0x80000000)) == 30. > + * > + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns > + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: > + * > + * For a value of zero or negative one, -1 will be returned. > + * > + * For all negative number cases, including 0x80000000 and > + * 0xffffffff, the correct value is obtained from LZD if instead of > + * negating the (already negative) value the logical-not is used. A > + * conditonal logical-not can be achieved in two instructions. > + */ > + temp = vgrf(glsl_type::int_type); > + > + bld.ASR(temp, src, brw_imm_d(31)); > + bld.XOR(temp, temp, src); > + } > + > + bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), temp); > > /* LZD counts from the MSB side, while GLSL's findMSB() wants the count > * from the LSB side. Subtract the result from 31 to convert the MSB > @@ -1339,17 +1363,23 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > nir_alu_instr *instr) > > case nir_op_ifind_msb: { > assert(nir_dest_bit_size(instr->dest.dest) < 64); > - bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); > > - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count > - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then > - * subtract the result from 31 to convert the MSB count into an LSB > count. > - */ > - bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); > + if (devinfo->gen < 7) { > + nir_emit_find_msb_using_lzd(bld, result, op[0], true); > + } else { > + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); > > - inst = bld.ADD(result, result, brw_imm_d(31)); > - inst->predicate = BRW_PREDICATE_NORMAL; > - inst->src[0].negate = true; > + /* FBH counts from the MSB side, while GLSL's findMSB() wants the > + * count from the LSB side. If FBH didn't return an error > + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB > + * count into an LSB count. > + */ > + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), > BRW_CONDITIONAL_NZ); > + > + inst = bld.ADD(result, result, brw_imm_d(31)); > + inst->predicate = BRW_PREDICATE_NORMAL; > + inst->src[0].negate = true; > + } > break; > } > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index cd88fb6..2fc2cf2 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -999,8 +999,32 @@ vec4_visitor::nir_emit_find_msb_using_lzd(const dst_reg > &dst, > bool is_signed) > { > vec4_instruction *inst; > + src_reg temp = src; > > - emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), src); > + if (is_signed) { > + /* LZD of an absolute value source almost always does the right > + * thing. There are two problem values: > + * > + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns > + * 0. However, findMSB(int(0x80000000)) == 30. > + * > + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns > + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: > + * > + * For a value of zero or negative one, -1 will be returned. > + * > + * For all negative number cases, including 0x80000000 and > + * 0xffffffff, the correct value is obtained from LZD if instead of > + * negating the (already negative) value the logical-not is used. A > + * conditonal logical-not can be achieved in two instructions. > + */ > + temp = src_reg(this, glsl_type::ivec4_type); > + > + emit(BRW_OPCODE_ASR, dst_reg(temp), src, brw_imm_d(31)); > + emit(BRW_OPCODE_XOR, dst_reg(temp), temp, src); > + } > + > + emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), temp); > > /* LZD counts from the MSB side, while GLSL's findMSB() wants the count > * from the LSB side. Subtract the result from 31 to convert the MSB count > @@ -1484,18 +1508,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) > break; > > case nir_op_ifind_msb: { > - emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); > - > - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count > - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then > - * subtract the result from 31 to convert the MSB count into an LSB > count. > - */ > src_reg src(dst); > - emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); > > - inst = emit(ADD(dst, src, brw_imm_d(31))); > - inst->predicate = BRW_PREDICATE_NORMAL; > - inst->src[0].negate = true; > + if (devinfo->gen < 7) { > + nir_emit_find_msb_using_lzd(dst, op[0], true); > + } else { > + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); > + > + /* FBH counts from the MSB side, while GLSL's findMSB() wants the > + * count from the LSB side. If FBH didn't return an error > + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB > + * count into an LSB count. > + */ > + emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); > + > + inst = emit(ADD(dst, src, brw_imm_d(31))); > + inst->predicate = BRW_PREDICATE_NORMAL; > + inst->src[0].negate = true; > + } > break; > } > > -- > 2.5.5 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev