On Mon, Jul 17, 2017 at 8:14 PM, Connor Abbott <cwabbo...@gmail.com> wrote: > On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner <matts...@gmail.com> wrote: >> If we know the high bits are zero, we can just do a 32-bit comparison on >> the low bytes instead. >> --- >> src/compiler/nir/nir_opt_algebraic.py | 14 +++++++++- >> src/compiler/nir/nir_search_helpers.h | 48 >> +++++++++++++++++++++++++++++++++++ >> 2 files changed, 61 insertions(+), 1 deletion(-) >> >> diff --git a/src/compiler/nir/nir_opt_algebraic.py >> b/src/compiler/nir/nir_opt_algebraic.py >> index df5854270c..a9c3e80929 100644 >> --- a/src/compiler/nir/nir_opt_algebraic.py >> +++ b/src/compiler/nir/nir_opt_algebraic.py >> @@ -44,7 +44,7 @@ d = 'd' >> # however, be used for backend-requested lowering operations as those need >> to >> # happen regardless of precision. >> # >> -# Variable names are specified as "[#]name[@type][(cond)]" where "#" >> inicates >> +# Variable names are specified as "[#]name[@type][(cond)]" where "#" >> indicates >> # that the given variable will only match constants and the type indicates >> that >> # the given variable will only match values from ALU instructions with the >> # given output type, and (cond) specifies an additional condition function >> @@ -144,6 +144,16 @@ optimizations = [ >> (('inot', ('ieq', a, b)), ('ine', a, b)), >> (('inot', ('ine', a, b)), ('ieq', a, b)), >> >> + # Unnecessary 64-bit comparisons >> + (('ieq', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ieq', >> ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), >> + (('ine', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ine', >> ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), >> + (('ilt', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ilt', >> ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), >> + (('ige', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ige', >> ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), >> + (('ult', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ult', >> ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), >> + (('uge', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('uge', >> ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))), >> + >> + (('iand', 'a@64(fits_in_32_bits)', 'b@64'), ('pack_64_2x32_split', >> ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), 0)), >> + >> # 0.0 >= b2f(a) >> # b2f(a) <= 0.0 >> # b2f(a) == 0.0 because b2f(a) can only be 0 or 1 >> @@ -315,6 +325,8 @@ optimizations = [ >> (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a), >> ('unpack_64_2x32_split_y', a)), a), >> >> + (('unpack_64_2x32_split_y', 'a(fits_in_32_bits)'), 0), >> + >> # Byte extraction >> (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'), >> (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), >> '!options->lower_extract_byte'), >> diff --git a/src/compiler/nir/nir_search_helpers.h >> b/src/compiler/nir/nir_search_helpers.h >> index 200f2471f8..c29ea5b9dd 100644 >> --- a/src/compiler/nir/nir_search_helpers.h >> +++ b/src/compiler/nir/nir_search_helpers.h >> @@ -115,6 +115,54 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, >> unsigned num_components, >> } >> >> static inline bool >> +fits_in_32_bits(nir_alu_instr *instr, unsigned src, unsigned num_components, >> + const uint8_t *swizzle) >> +{ >> + if (instr->src[src].src.is_ssa && >> + instr->src[src].src.ssa->parent_instr->type == nir_instr_type_alu) { >> + nir_alu_instr *parent_instr = >> + nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); >> + >> + switch (parent_instr->op) { >> + case nir_op_pack_64_2x32_split: { >> + nir_const_value *val = >> + nir_src_as_const_value(parent_instr->src[1].src); >> + >> + if (val && val->u32[0] == 0) >> + return true; > > I think you need to check all the components here, or at least all the > components that are used by parent_instr.
True. >> + break; >> + } >> + default: >> + break; >> + } >> + >> + return false; >> + } >> + >> + nir_const_value *val = nir_src_as_const_value(instr->src[src].src); >> + >> + if (!val) >> + return false; >> + >> + for (unsigned i = 0; i < num_components; i++) { >> + switch (nir_op_infos[instr->op].input_types[src]) { >> + case nir_type_int: >> + if (val->i64[swizzle[i]] != (int)val->i64[swizzle[i]]) >> + return false; >> + break; >> + case nir_type_uint: >> + if (val->u64[swizzle[i]] != (unsigned)val->u64[swizzle[i]]) >> + return false; >> + break; >> + default: >> + return false; >> + } >> + } >> + >> + return true; >> +} > > This function seems to be doing two subtly different things: > > 1. If the instruction defining the source is nir_op_pack_64_2x32_split > or the source type is nir_type_uint, then it checks if the high 32 > bits are 0. > 2. Otherwise, it checks that the value can be replaced by a 32-bit > value that's sign extended to 64 bits, that is, whether the high 32 > bits are the same as bit 31. > > The problem is that if the source type is nir_type_int, then this > function checks two different things depending on whether the source > is a constant or the result of nir_op_pack_64_2x32_split. Based on how > you're using it, it seems like you really want to check whether the > high bits are the same as bit 31 in this case, so need to bail out of > the first 'if' if the source type isn't nir_type_uint. You are right. I gave this a try, and it breaks the optimization, because the cases I'm looking to optimize are (ine, 0, pack_64_2x32(ballot(...), 0)). Since we use "ine", so the type comes back as nir_type_int. In that case I have case nir_type_int: if (!hi_bits || !lo_bits || hi_bits->i32[i] != (lo_bits->i32[i] >> 31)) return false; break; but lo_bits is not const, so it fails. Any suggestions? I don't think I really had any reason for the sign-extending case, FWIW. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev