Hi, generally speaking, the ipa-cp.c and ipa-cp.[hc] bits look reasonable, but I have a few comments:
On Thu, Aug 04, 2016 at 12:06:18PM +0530, Prathamesh Kulkarni wrote: > Hi, > This is a prototype patch for propagating known/unknown bits > inter-procedurally. > for integral types which propagates info obtained from get_nonzero_bits (). > > Patch required making following changes: > a) To make info from get_nonzero_bits() available to ipa in IPA phase, you should not be looking at SSA_NAMEs, those will not be available with LTO when you do not have access to function bodies and thus also not to SSA_NAMES. In IPA, you should only rely on hat you have in jump functions. > , I had to remove > guard !nonzero_p in ccp_finalize. However that triggered the following ICE > in get_ptr_info() for default_none.f95 (and several other fortran tests) > with options: -fopenacc -O2 > ICE: http://pastebin.com/KjD7HMQi > I confirmed with Richard that this was a latent issue. > > > I suppose we would want to gate this on some flag, say -fipa-bit-cp ? Yes, definitely. > I haven't yet gated it on the flag, will do in next version of patch. > I have added some very simple test-cases, I will try to add more > meaningful ones. > > Patch passes bootstrap+test on x86_64-unknown-linux-gnu > and cross-tested on arm*-*-* and aarch64*-*-* with the exception > of some fortran tests failing due to above ICE. > > As next steps, I am planning to extend it to handle alignment propagation, > and do further testing (lto-bootstrap, chromium). > I would be grateful for feedback on the current patch. > > Thanks, > Prathamesh > diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c > index 5b6cb9a..b770f6a 100644 > --- a/gcc/ipa-cp.c > +++ b/gcc/ipa-cp.c > @@ -120,6 +120,7 @@ along with GCC; see the file COPYING3. If not see > #include "params.h" > #include "ipa-inline.h" > #include "ipa-utils.h" > +#include "tree-ssa-ccp.h" > > template <typename valtype> class ipcp_value; > > @@ -266,6 +267,40 @@ private: > bool meet_with_1 (unsigned new_align, unsigned new_misalign); > }; > > +/* Lattice of known bits, only capable of holding one value. > + Similar to ccp_prop_value_t, mask represents which bits of value are > constant. > + If a bit in mask is set to 0, then the corresponding bit in > + value is known to be constant. */ > + > +class ipcp_bits_lattice > +{ > +public: > + bool bottom_p () { return lattice_val == IPA_BITS_VARYING; } > + bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; } > + bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; } > + bool set_to_bottom (); > + bool set_to_constant (widest_int, widest_int, signop, unsigned); > + > + widest_int get_value () { return value; } > + widest_int get_mask () { return mask; } > + signop get_sign () { return sgn; } > + unsigned get_precision () { return precision; } > + > + bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree); > + bool meet_with (widest_int, widest_int, signop, unsigned); > + > + void print (FILE *); > + > +private: > + enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } > lattice_val; > + widest_int value, mask; > + signop sgn; > + unsigned precision; > + > + bool meet_with_1 (widest_int, widest_int); > + void get_value_and_mask (tree, widest_int *, widest_int *); > +}; > + > /* Structure containing lattices for a parameter itself and for pieces of > aggregates that are passed in the parameter or by a reference in a > parameter > plus some other useful flags. */ > @@ -281,6 +316,8 @@ public: > ipcp_agg_lattice *aggs; > /* Lattice describing known alignment. */ > ipcp_alignment_lattice alignment; > + /* Lattice describing known bits. */ > + ipcp_bits_lattice bits_lattice; > /* Number of aggregate lattices */ > int aggs_count; > /* True if aggregate data were passed by reference (as opposed to by > @@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f) > fprintf (f, " Alignment %u, misalignment %u\n", align, misalign); > } > > +void > +ipcp_bits_lattice::print (FILE *f) > +{ > + if (top_p ()) > + fprintf (f, " Bits unknown (TOP)\n"); > + else if (bottom_p ()) > + fprintf (f, " Bits unusable (BOTTOM)\n"); > + else > + { > + fprintf (f, " Bits: value = "); print_hex (get_value (), f); > + fprintf (f, ", mask = "); print_hex (get_mask (), f); > + fprintf (f, "\n"); > + } > +} > + > /* Print all ipcp_lattices of all functions to F. */ > > static void > @@ -484,6 +536,7 @@ print_all_lattices (FILE * f, bool dump_sources, bool > dump_benefits) > fprintf (f, " ctxs: "); > plats->ctxlat.print (f, dump_sources, dump_benefits); > plats->alignment.print (f); > + plats->bits_lattice.print (f); > if (plats->virt_call) > fprintf (f, " virt_call flag set\n"); > > @@ -911,6 +964,161 @@ ipcp_alignment_lattice::meet_with (const > ipcp_alignment_lattice &other, > return meet_with_1 (other.align, adjusted_misalign); > } > > +/* Set lattice value to bottom, if it already isn't the case. */ > + > +bool > +ipcp_bits_lattice::set_to_bottom () > +{ > + if (bottom_p ()) > + return false; > + lattice_val = IPA_BITS_VARYING; > + value = 0; > + mask = -1; > + return true; > +} > + > +/* Set to constant if it isn't already. Only meant to be called > + when switching state from TOP. */ > + > +bool > +ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask, > + signop sgn, unsigned precision) > +{ > + gcc_assert (top_p ()); > + this->lattice_val = IPA_BITS_CONSTANT; > + this->value = value; > + this->mask = mask; > + this->sgn = sgn; > + this->precision = precision; > + return true; > +} > + > +/* Convert operand to value, mask form. */ > + > +void > +ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, > widest_int *maskp) > +{ > + wide_int get_nonzero_bits (const_tree); > + > + if (TREE_CODE (operand) == INTEGER_CST) > + { > + *valuep = wi::to_widest (operand); > + *maskp = 0; > + } > + else if (TREE_CODE (operand) == SSA_NAME) IIUC, operand is the operand from pass-through jump function and that should never be an SSA_NAME. I have even looked at how we generate them and it seems fairly safe to say that they never are. If you have seen an SSA_NAME here, it is a bug and please let me know because sooner or later it will cause an assert. > + { > + *valuep = 0; > + *maskp = widest_int::from (get_nonzero_bits (operand), UNSIGNED); > + } > + else > + gcc_unreachable (); The operand however can be any any other is_gimple_ip_invariant tree. I assume that you could hit this gcc_unreachable only in a program with undefined behavior (or with a Fortran CONST_DECL?) but you should not ICE here. > +} > + > +/* Meet operation, similar to ccp_lattice_meet, we xor values > + if this->value, value have different values at same bit positions, we want > + to drop that bit to varying. Return true if mask is changed. > + This function assumes that the lattice value is in CONSTANT state */ > + > +bool > +ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask) > +{ > + gcc_assert (constant_p ()); > + > + widest_int old_mask = this->mask; > + this->mask = (this->mask | mask) | (this->value ^ value); > + > + if (wi::sext (this->mask, this->precision) == -1) > + return set_to_bottom (); > + > + bool changed = this->mask != old_mask; > + return changed; > +} > + > +/* Meet the bits lattice with operand > + described by <value, mask, sgn, precision. */ > + > +bool > +ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, > + signop sgn, unsigned precision) > +{ > + if (bottom_p ()) > + return false; > + > + if (top_p ()) > + { > + if (wi::sext (mask, precision) == -1) > + return set_to_bottom (); > + return set_to_constant (value, mask, sgn, precision); > + } > + > + return meet_with_1 (value, mask); What if precisions do not match? > +} > + > +/* Meet bits lattice with the result of bit_value_binop_1 (other, operand) > + if code is binary operation or bit_value_unop_1 (other) if code is unary > op. > + In the case when code is nop_expr, no adjustment is required. */ > + > +bool > +ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, enum tree_code code, > tree operand) > +{ > + if (other.bottom_p ()) > + return set_to_bottom (); > + > + if (bottom_p () || other.top_p ()) > + return false; > + > + widest_int adjusted_value, adjusted_mask; > + > + if (TREE_CODE_CLASS (code) == tcc_binary) > + { > + tree type = TREE_TYPE (operand); > + gcc_assert (INTEGRAL_TYPE_P (type)); > + widest_int o_value, o_mask; > + get_value_and_mask (operand, &o_value, &o_mask); > + > + signop sgn = other.get_sign (); > + unsigned prec = other.get_precision (); > + > + bit_value_binop_1 (code, sgn, prec, &adjusted_value, &adjusted_mask, > + sgn, prec, other.get_value (), other.get_mask (), > + TYPE_SIGN (type), TYPE_PRECISION (type), o_value, > o_mask); It is probably just me not being particularly sharp on a Friday afternoon and I might not understand the semantics of mask well (also, you did not document it :-), but... assume that we are looking at a binary and operation, other comes from an SSA pointer and its mask would be binary 100 and its value 0 because that's what you set for ssa names in ipa-prop.h, and the operand is binary value 101, which means that get_value_and_mask returns mask 0 and value 101. Now, bit_value_binop_1 would return value 0 & 101 = 0 and mask according to (m1 | m2) & ((v1 | m1) & (v2 | m2)) so in our case (100b & 0) & ((0 | 100b) & (101b | 0)) = 0 & 100b = 0. So both value and mask would be zero, which, if this was the only incoming value to the lattice, I am afraid you would later on in ipcp_update_bits interpret as that no bits can be non-zero. Yet the third bit clearly could. I think that this is the only place where you interpret mask as a mask and actually use it for and-ing, everywhere else you interpret it basically only as the result of get_nonzero_bits and use it for or-ing. > + > + if (wi::sext (adjusted_mask, prec) == -1) > + return set_to_bottom (); > + } > + > + else if (TREE_CODE_CLASS (code) == tcc_unary) > + { > + signop sgn = other.get_sign (); > + unsigned prec = other.get_precision (); > + > + bit_value_unop_1 (code, sgn, prec, &adjusted_value, > + &adjusted_mask, sgn, prec, other.get_value (), > + other.get_mask ()); > + > + if (wi::sext (adjusted_mask, prec) == -1) > + return set_to_bottom (); > + } > + > + else if (code == NOP_EXPR) > + { > + adjusted_value = other.value; > + adjusted_mask = other.mask; > + } > + > + else > + return set_to_bottom (); > + > + if (top_p ()) > + { > + if (wi::sext (adjusted_mask, other.get_precision ()) == -1) > + return set_to_bottom (); > + return set_to_constant (adjusted_value, adjusted_mask, other.get_sign > (), other.get_precision ()); > + } > + else > + return meet_with_1 (adjusted_value, adjusted_mask); Again, What if precisions do not match? > +} > + > /* Mark bot aggregate and scalar lattices as containing an unknown variable, > return true is any of them has not been marked as such so far. */ > > @@ -922,6 +1130,7 @@ set_all_contains_variable (struct ipcp_param_lattices > *plats) > ret |= plats->ctxlat.set_contains_variable (); > ret |= set_agg_lats_contain_variable (plats); > ret |= plats->alignment.set_to_bottom (); > + ret |= plats->bits_lattice.set_to_bottom (); > return ret; > } > > @@ -1003,6 +1212,7 @@ initialize_node_lattices (struct cgraph_node *node) > plats->ctxlat.set_to_bottom (); > set_agg_lats_to_bottom (plats); > plats->alignment.set_to_bottom (); > + plats->bits_lattice.set_to_bottom (); > } > else > set_all_contains_variable (plats); > @@ -1621,6 +1831,57 @@ propagate_alignment_accross_jump_function (cgraph_edge > *cs, > } > } > > +/* Propagate bits across jfunc that is associated with > + edge cs and update dest_lattice accordingly. */ > + > +bool > +propagate_bits_accross_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, > + ipcp_bits_lattice *dest_lattice) > +{ > + if (dest_lattice->bottom_p ()) > + return false; > + > + if (jfunc->type == IPA_JF_PASS_THROUGH) > + { > + struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); > + enum tree_code code = ipa_get_jf_pass_through_operation (jfunc); > + tree operand = NULL_TREE; > + > + if (code != NOP_EXPR) > + operand = ipa_get_jf_pass_through_operand (jfunc); > + > + int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); > + struct ipcp_param_lattices *src_lats > + = ipa_get_parm_lattices (caller_info, src_idx); > + > + /* Try to proapgate bits if src_lattice is bottom, but jfunc is known. propagate > + for eg consider: > + int f(int x) > + { > + g (x & 0xff); > + } > + Assume lattice for x is bottom, however we can still propagate > + result of x & 0xff == 0xff, which gets computed during ccp1 pass > + and we store it in jump function during analysis stage. */ > + > + if (src_lats->bits_lattice.bottom_p () > + && jfunc->bits.known) > + return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask, > + jfunc->bits.sgn, jfunc->bits.precision); > + else > + return dest_lattice->meet_with (src_lats->bits_lattice, code, operand); > + } > + > + else if (jfunc->type == IPA_JF_ANCESTOR) > + return dest_lattice->set_to_bottom (); > + > + else if (jfunc->bits.known) > + return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask, > + jfunc->bits.sgn, jfunc->bits.precision); > + else > + return dest_lattice->set_to_bottom (); > +} > + ... > diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h > index e32d078..d69a071 100644 > --- a/gcc/ipa-prop.h > +++ b/gcc/ipa-prop.h > @@ -154,6 +154,16 @@ struct GTY(()) ipa_alignment > unsigned misalign; > }; > > +/* Information about zero/non-zero bits. */ > +struct GTY(()) ipa_bits > +{ > + bool known; > + widest_int value; > + widest_int mask; > + enum signop sgn; > + unsigned precision; > +}; Please order the fields according to their size, the largest first and add a comment describing each one of them. As I explained above, it is not immediately clear why the mask would be a mask, for example. Nevertheless, thanks for looking into this. It would be nice to have this for pointers too, not least because we could represent non-NULL-ness this way, which could be very interesting for cloning and inlining. But those are further steps, once normal propagation works. Martin