On Wed, 27 May 2020, Hao Liu OS wrote: > Hi all, > > Previously, the fix for > PR89430<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=89430> was > reverted by PR94734<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=94734> > due to a bug. The root cause is missing non-trapping check with > dominating LOAD/STORE. > > This patch extends the cselim non-trapping check to support ARRAY_REF > and COMPONENT_REF (previously only support MEM_REF) by > get_inner_reference and hashing according to the comments from Jakub. > > To support cases in PR89430, if there is a dominating LOAD of local > variable without address escape, as local stack is always writable, the > STORE is not trapped and can be optimized. > > Review, please.
How did you test the patch? There's a ChangeLog missing as well as a testcase or testcase adjustments in case we XFAILed the old ones. It helps to post patches created by git format-patch. First comments inline... > -------------------- > diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c > index b1e0dce93d8..3733780a0bc 100644 > --- a/gcc/tree-ssa-phiopt.c > +++ b/gcc/tree-ssa-phiopt.c > @@ -1986,26 +1986,31 @@ abs_replacement (basic_block cond_bb, basic_block > middle_bb, > > ??? We currently are very conservative and assume that a load might > trap even if a store doesn't (write-only memory). This probably is > - overly conservative. */ > + overly conservative. > > -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF > - through it was seen, which would constitute a no-trap region for > - same accesses. */ > -struct name_to_bb > + We currently support a special case that for !TREE_ADDRESSABLE automatic > + variables, it could ignore whether something is a load or store because > the > + local stack should be always writable. */ > + > +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which > + basic block an *_REF through it was seen, which would constitute a > + no-trap region for same accesses. */ > +struct ref_to_bb > { > - unsigned int ssa_name_ver; > + tree base; > + poly_int64 bitsize, bitpos; > + tree offset; > unsigned int phase; > - bool store; > - HOST_WIDE_INT offset, size; > + bool writable; > basic_block bb; > }; > > /* Hashtable helpers. */ > > -struct ssa_names_hasher : free_ptr_hash <name_to_bb> > +struct refs_hasher : free_ptr_hash<ref_to_bb> > { > - static inline hashval_t hash (const name_to_bb *); > - static inline bool equal (const name_to_bb *, const name_to_bb *); > + static inline hashval_t hash (const ref_to_bb *); > + static inline bool equal (const ref_to_bb *, const ref_to_bb *); > }; > > /* Used for quick clearing of the hash-table when we see calls. > @@ -2015,28 +2020,44 @@ static unsigned int nt_call_phase; > /* The hash function. */ > > inline hashval_t > -ssa_names_hasher::hash (const name_to_bb *n) > +refs_hasher::hash (const ref_to_bb *n) > { > - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) > - ^ (n->offset << 6) ^ (n->size << 3); > + inchash::hash hstate; > + inchash::add_expr (n->base, hstate); > + hstate.add_poly_int (n->bitsize); > + hstate.add_poly_int (n->bitpos); > + hstate.add_int (n->writable); > + if (n->offset != NULL_TREE) > + { > + inchash::add_expr (n->offset, hstate); > + } extra {} > + return hstate.end (); > } > > /* The equality function of *P1 and *P2. */ > > inline bool > -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) > +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) > { > - return n1->ssa_name_ver == n2->ssa_name_ver > - && n1->store == n2->store > - && n1->offset == n2->offset > - && n1->size == n2->size; > + if (operand_equal_p (n1->base, n2->base, 0) > + && known_eq (n1->bitsize, n2->bitsize) > + && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2->writable) > + { > + /* Should not call operand_equal_p with NULL_TREE. */ > + if (n1->offset == NULL_TREE || n2->offset == NULL_TREE) > + return n1->offset == n2->offset; indents are off. You should be using a tab-stop of 8 characters. > + else > + return operand_equal_p (n1->offset, n2->offset, 0); > + } > + return false; > } > > class nontrapping_dom_walker : public dom_walker > { > public: > nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) > - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} > + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256) > + {} > > virtual edge before_dom_children (basic_block); > virtual void after_dom_children (basic_block); > @@ -2053,7 +2074,7 @@ private: > hash_set<tree> *m_nontrapping; > > /* The hash table for remembering what we've seen. */ > - hash_table<ssa_names_hasher> m_seen_ssa_names; > + hash_table<refs_hasher> m_seen_refs; > }; > > /* Called by walk_dominator_tree, when entering the block BB. */ > @@ -2102,65 +2123,76 @@ nontrapping_dom_walker::after_dom_children > (basic_block bb) > } > > /* We see the expression EXP in basic block BB. If it's an interesting > - expression (an MEM_REF through an SSA_NAME) possibly insert the > - expression into the set NONTRAP or the hash table of seen expressions. > - STORE is true if this expression is on the LHS, otherwise it's on > - the RHS. */ > + expression of: > + 1) MEM_REF > + 2) ARRAY_REF > + 3) COMPONENT_REF > + possibly insert the expression into the set NONTRAP or the hash table > + of seen expressions. STORE is true if this expression is on the LHS, > + otherwise it's on the RHS. */ > void > nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool > store) > { > - HOST_WIDE_INT size; > - > - if (TREE_CODE (exp) == MEM_REF > - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME > - && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) > - && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) > + if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF > + || TREE_CODE (exp) == COMPONENT_REF) > { > - tree name = TREE_OPERAND (exp, 0); > - struct name_to_bb map; > - name_to_bb **slot; > - struct name_to_bb *n2bb; > + struct ref_to_bb map; > + ref_to_bb **slot; > + struct ref_to_bb *r2bb; > basic_block found_bb = 0; > > - /* Try to find the last seen MEM_REF through the same > - SSA_NAME, which can trap. */ > - map.ssa_name_ver = SSA_NAME_VERSION (name); > - map.phase = 0; > - map.bb = 0; > - map.store = store; > - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); > - map.size = size; > - > - slot = m_seen_ssa_names.find_slot (&map, INSERT); > - n2bb = *slot; > - if (n2bb && n2bb->phase >= nt_call_phase) > - found_bb = n2bb->bb; > - > - /* If we've found a trapping MEM_REF, _and_ it dominates EXP > - (it's in a basic block on the path from us to the dominator root) > + /* Try to find the last seen *_REF (through the same base expression > + + bitsize + bitpos + offset expression), which can trap. */ > + machine_mode nmode; > + poly_int64 bitsize, bitpos; > + tree offset; > + int nunsignedp, nreversep, nvolatilep = 0; > + tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset, > &nmode, > + &nunsignedp, &nreversep, > &nvolatilep); > + gcc_assert (base != NULL_TREE); So my main concern is that get_inner_reference is quite expensive since it builds the variable offset part as new GENERIC trees. I think it would be easier to fully rely on operand_equal_p here and thus record the original full expression. For the writable flag you can use the way cheaper get_base_address () which gets you 'base' without the overhead of building the offset tree. That means you merely exchange ssa_name_ver in the struct with 'exp'. Thanks, Richard. > + map.base = base; > + map.offset = offset; > + map.bitsize = bitsize; > + map.bitpos = bitpos; > + > + /* The reference is writable if EXP is a: > + 1) STORE, or > + 2) LOAD of a local variable without address-taken (as the local > stack > + is always writable). This allows cselim on a STORE with a > dominating > + LOAD. */ > + map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE > (base)); > + > + slot = m_seen_refs.find_slot (&map, INSERT); > + r2bb = *slot; > + if (r2bb && r2bb->phase >= nt_call_phase) > + found_bb = r2bb->bb; > + > + /* If we've found a trapping *_REF, _and_ it dominates EXP > + (it's in a basic block on the path from us to the dominator root) > then we can't trap. */ > - if (found_bb && (((size_t)found_bb->aux) & 1) == 1) > + if (found_bb && (((size_t) found_bb->aux) & 1) == 1) > { > m_nontrapping->add (exp); > } > else > - { > + { > /* EXP might trap, so insert it into the hash table. */ > - if (n2bb) > + if (r2bb) > { > - n2bb->phase = nt_call_phase; > - n2bb->bb = bb; > + r2bb->phase = nt_call_phase; > + r2bb->bb = bb; > } > else > { > - n2bb = XNEW (struct name_to_bb); > - n2bb->ssa_name_ver = SSA_NAME_VERSION (name); > - n2bb->phase = nt_call_phase; > - n2bb->bb = bb; > - n2bb->store = store; > - n2bb->offset = map.offset; > - n2bb->size = size; > - *slot = n2bb; > + r2bb = XNEW (struct ref_to_bb); > + r2bb->phase = nt_call_phase; > + r2bb->bb = bb; > + r2bb->base = base; > + r2bb->offset = map.offset; > + r2bb->bitsize = map.bitsize; > + r2bb->bitpos = map.bitpos; > + r2bb->writable = map.writable; > + *slot = r2bb; > } > } > } > > > Thanks, > -Hao > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)