On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner <matts...@gmail.com> wrote: > If we know the high bits are zero, we can just do a 32-bit comparison on > the low bytes instead. > --- > src/compiler/nir/nir_opt_algebraic.py | 14 +++++++++- > src/compiler/nir/nir_search_helpers.h | 48 > +++++++++++++++++++++++++++++++++++ > 2 files changed, 61 insertions(+), 1 deletion(-) > > diff --git a/src/compiler/nir/nir_opt_algebraic.py > b/src/compiler/nir/nir_opt_algebraic.py > index df5854270c..a9c3e80929 100644 > --- a/src/compiler/nir/nir_opt_algebraic.py > +++ b/src/compiler/nir/nir_opt_algebraic.py > @@ -44,7 +44,7 @@ d = 'd' > # however, be used for backend-requested lowering operations as those need to > # happen regardless of precision. > # > -# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates > +# Variable names are specified as "[#]name[@type][(cond)]" where "#" > indicates > # that the given variable will only match constants and the type indicates > that > # the given variable will only match values from ALU instructions with the > # given output type, and (cond) specifies an additional condition function > @@ -144,6 +144,16 @@ optimizations = [ > (('inot', ('ieq', a, b)), ('ine', a, b)), > (('inot', ('ine', a, b)), ('ieq', a, b)), > > + # Unnecessary 64-bit comparisons > + (('ieq', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ieq', > ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), > + (('ine', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ine', > ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), > + (('ilt', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ilt', > ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), > + (('ige', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ige', > ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), > + (('ult', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ult', > ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), > + (('uge', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('uge', > ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), > + > + (('iand', 'a@64(fits_in_32_bits)', 'b@64'), ('pack_64_2x32_split', > ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), 0)), > + > # 0.0 >= b2f(a) > # b2f(a) <= 0.0 > # b2f(a) == 0.0 because b2f(a) can only be 0 or 1 > @@ -315,6 +325,8 @@ optimizations = [ > (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a), > ('unpack_64_2x32_split_y', a)), a), > > + (('unpack_64_2x32_split_y', 'a(fits_in_32_bits)'), 0), > + > # Byte extraction > (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'), > (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), > '!options->lower_extract_byte'), > diff --git a/src/compiler/nir/nir_search_helpers.h > b/src/compiler/nir/nir_search_helpers.h > index 200f2471f8..c29ea5b9dd 100644 > --- a/src/compiler/nir/nir_search_helpers.h > +++ b/src/compiler/nir/nir_search_helpers.h > @@ -115,6 +115,54 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, > unsigned num_components, > } > > static inline bool > +fits_in_32_bits(nir_alu_instr *instr, unsigned src, unsigned num_components, > + const uint8_t *swizzle) > +{ > + if (instr->src[src].src.is_ssa && > + instr->src[src].src.ssa->parent_instr->type == nir_instr_type_alu) { > + nir_alu_instr *parent_instr = > + nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); > + > + switch (parent_instr->op) { > + case nir_op_pack_64_2x32_split: { > + nir_const_value *val = > + nir_src_as_const_value(parent_instr->src[1].src); > + > + if (val && val->u32[0] == 0) > + return true; > + break; > + } > + default: > + break; > + } > + > + return false; > + } > + > + nir_const_value *val = nir_src_as_const_value(instr->src[src].src); > + > + if (!val) > + return false; > + > + for (unsigned i = 0; i < num_components; i++) { > + switch (nir_op_infos[instr->op].input_types[src]) { > + case nir_type_int: > + if (val->i64[swizzle[i]] != (int)val->i64[swizzle[i]])
Oh, also... using int32_t and uint32_t here might be a little more clear, since the exact bitsize matters. > + return false; > + break; > + case nir_type_uint: > + if (val->u64[swizzle[i]] != (unsigned)val->u64[swizzle[i]]) > + return false; > + break; > + default: > + return false; > + } > + } > + > + return true; > +} > + > +static inline bool > is_not_const(nir_alu_instr *instr, unsigned src, unsigned num_components, > const uint8_t *swizzle) > { > -- > 2.13.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev