On Wed, 27 May 2020, Hao Liu OS wrote:

> Hi all,
> 
> Previously, the fix for 
> PR89430<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=89430> was 
> reverted by PR94734<https://gcc.gnu.org/bugzilla//show_bug.cgi?id=94734> 
> due to a bug. The root cause is missing non-trapping check with 
> dominating LOAD/STORE.
> 
> This patch extends the cselim non-trapping check to support ARRAY_REF 
> and COMPONENT_REF (previously only support MEM_REF) by 
> get_inner_reference and hashing according to the comments from Jakub.
> 
> To support cases in PR89430, if there is a dominating LOAD of local 
> variable without address escape, as local stack is always writable, the 
> STORE is not trapped and can be optimized.
> 
> Review, please.

How did you test the patch?  There's a ChangeLog missing as well as
a testcase or testcase adjustments in case we XFAILed the old ones.
It helps to post patches created by git format-patch.

First comments inline...

> --------------------
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index b1e0dce93d8..3733780a0bc 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -1986,26 +1986,31 @@ abs_replacement (basic_block cond_bb, basic_block 
> middle_bb,
> 
>     ??? We currently are very conservative and assume that a load might
>     trap even if a store doesn't (write-only memory).  This probably is
> -   overly conservative.  */
> +   overly conservative.
> 
> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
> -   through it was seen, which would constitute a no-trap region for
> -   same accesses.  */
> -struct name_to_bb
> +   We currently support a special case that for !TREE_ADDRESSABLE automatic
> +   variables, it could ignore whether something is a load or store because 
> the
> +   local stack should be always writable.  */
> +
> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
> +   basic block an *_REF through it was seen, which would constitute a
> +   no-trap region for same accesses.  */
> +struct ref_to_bb
>  {
> -  unsigned int ssa_name_ver;
> +  tree base;
> +  poly_int64 bitsize, bitpos;
> +  tree offset;
>    unsigned int phase;
> -  bool store;
> -  HOST_WIDE_INT offset, size;
> +  bool writable;
>    basic_block bb;
>  };
> 
>  /* Hashtable helpers.  */
> 
> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>
> +struct refs_hasher : free_ptr_hash<ref_to_bb>
>  {
> -  static inline hashval_t hash (const name_to_bb *);
> -  static inline bool equal (const name_to_bb *, const name_to_bb *);
> +  static inline hashval_t hash (const ref_to_bb *);
> +  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
>  };
> 
>  /* Used for quick clearing of the hash-table when we see calls.
> @@ -2015,28 +2020,44 @@ static unsigned int nt_call_phase;
>  /* The hash function.  */
> 
>  inline hashval_t
> -ssa_names_hasher::hash (const name_to_bb *n)
> +refs_hasher::hash (const ref_to_bb *n)
>  {
> -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
> -         ^ (n->offset << 6) ^ (n->size << 3);
> +  inchash::hash hstate;
> +  inchash::add_expr (n->base, hstate);
> +  hstate.add_poly_int (n->bitsize);
> +  hstate.add_poly_int (n->bitpos);
> +  hstate.add_int (n->writable);
> +  if (n->offset != NULL_TREE)
> +    {
> +      inchash::add_expr (n->offset, hstate);
> +    }

extra {}

> +  return hstate.end ();
>  }
> 
>  /* The equality function of *P1 and *P2.  */
> 
>  inline bool
> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
>  {
> -  return n1->ssa_name_ver == n2->ssa_name_ver
> -         && n1->store == n2->store
> -         && n1->offset == n2->offset
> -         && n1->size == n2->size;
> +  if (operand_equal_p (n1->base, n2->base, 0)
> +      && known_eq (n1->bitsize, n2->bitsize)
> +      && known_eq (n1->bitpos, n2->bitpos) && n1->writable == n2->writable)
> +    {
> +      /* Should not call operand_equal_p with NULL_TREE.  */
> +      if (n1->offset == NULL_TREE || n2->offset == NULL_TREE)
> +          return n1->offset == n2->offset;

indents are off.  You should be using a tab-stop of 8 characters.

> +      else
> +          return operand_equal_p (n1->offset, n2->offset, 0);
> +    }
> +  return false;
>  }
> 
>  class nontrapping_dom_walker : public dom_walker
>  {
>  public:
>    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
> -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
> +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (256)
> +  {}
> 
>    virtual edge before_dom_children (basic_block);
>    virtual void after_dom_children (basic_block);
> @@ -2053,7 +2074,7 @@ private:
>    hash_set<tree> *m_nontrapping;
> 
>    /* The hash table for remembering what we've seen.  */
> -  hash_table<ssa_names_hasher> m_seen_ssa_names;
> +  hash_table<refs_hasher> m_seen_refs;
>  };
> 
>  /* Called by walk_dominator_tree, when entering the block BB.  */
> @@ -2102,65 +2123,76 @@ nontrapping_dom_walker::after_dom_children 
> (basic_block bb)
>  }
> 
>  /* We see the expression EXP in basic block BB.  If it's an interesting
> -   expression (an MEM_REF through an SSA_NAME) possibly insert the
> -   expression into the set NONTRAP or the hash table of seen expressions.
> -   STORE is true if this expression is on the LHS, otherwise it's on
> -   the RHS.  */
> +   expression of:
> +     1) MEM_REF
> +     2) ARRAY_REF
> +     3) COMPONENT_REF
> +   possibly insert the expression into the set NONTRAP or the hash table
> +   of seen expressions.  STORE is true if this expression is on the LHS,
> +   otherwise it's on the RHS.  */
>  void
>  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool 
> store)
>  {
> -  HOST_WIDE_INT size;
> -
> -  if (TREE_CODE (exp) == MEM_REF
> -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
> -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
> -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
> +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
> +      || TREE_CODE (exp) == COMPONENT_REF)
>      {
> -      tree name = TREE_OPERAND (exp, 0);
> -      struct name_to_bb map;
> -      name_to_bb **slot;
> -      struct name_to_bb *n2bb;
> +      struct ref_to_bb map;
> +      ref_to_bb **slot;
> +      struct ref_to_bb *r2bb;
>        basic_block found_bb = 0;
> 
> -      /* Try to find the last seen MEM_REF through the same
> -         SSA_NAME, which can trap.  */
> -      map.ssa_name_ver = SSA_NAME_VERSION (name);
> -      map.phase = 0;
> -      map.bb = 0;
> -      map.store = store;
> -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
> -      map.size = size;
> -
> -      slot = m_seen_ssa_names.find_slot (&map, INSERT);
> -      n2bb = *slot;
> -      if (n2bb && n2bb->phase >= nt_call_phase)
> -        found_bb = n2bb->bb;
> -
> -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
> -         (it's in a basic block on the path from us to the dominator root)
> +      /* Try to find the last seen *_REF (through the same base expression
> +         + bitsize + bitpos + offset expression), which can trap.  */
> +      machine_mode nmode;
> +      poly_int64 bitsize, bitpos;
> +      tree offset;
> +      int nunsignedp, nreversep, nvolatilep = 0;
> +      tree base = get_inner_reference (exp, &bitsize, &bitpos, &offset, 
> &nmode,
> +                                        &nunsignedp, &nreversep, 
> &nvolatilep);
> +      gcc_assert (base != NULL_TREE);

So my main concern is that get_inner_reference is quite expensive
since it builds the variable offset part as new GENERIC trees.

I think it would be easier to fully rely on operand_equal_p here
and thus record the original full expression.  For the writable
flag you can use the way cheaper get_base_address () which gets
you 'base' without the overhead of building the offset tree.

That means you merely exchange ssa_name_ver in the struct with 'exp'.

Thanks,
Richard.

> +      map.base = base;
> +      map.offset = offset;
> +      map.bitsize = bitsize;
> +      map.bitpos = bitpos;
> +
> +      /* The reference is writable if EXP is a:
> +         1) STORE, or
> +         2) LOAD of a local variable without address-taken (as the local 
> stack
> +         is always writable).  This allows cselim on a STORE with a 
> dominating
> +         LOAD.  */
> +      map.writable = store || (auto_var_p (base) && !TREE_ADDRESSABLE 
> (base));
> +
> +      slot = m_seen_refs.find_slot (&map, INSERT);
> +      r2bb = *slot;
> +      if (r2bb && r2bb->phase >= nt_call_phase)
> +        found_bb = r2bb->bb;
> +
> +      /* If we've found a trapping *_REF, _and_ it dominates EXP
> +         (it's in a basic block on the path from us to the dominator root)
>           then we can't trap.  */
> -      if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
> +      if (found_bb && (((size_t) found_bb->aux) & 1) == 1)
>          {
>            m_nontrapping->add (exp);
>          }
>        else
> -        {
> +        {
>            /* EXP might trap, so insert it into the hash table.  */
> -          if (n2bb)
> +          if (r2bb)
>              {
> -              n2bb->phase = nt_call_phase;
> -              n2bb->bb = bb;
> +              r2bb->phase = nt_call_phase;
> +              r2bb->bb = bb;
>              }
>            else
>              {
> -              n2bb = XNEW (struct name_to_bb);
> -              n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
> -              n2bb->phase = nt_call_phase;
> -              n2bb->bb = bb;
> -              n2bb->store = store;
> -              n2bb->offset = map.offset;
> -              n2bb->size = size;
> -              *slot = n2bb;
> +              r2bb = XNEW (struct ref_to_bb);
> +              r2bb->phase = nt_call_phase;
> +              r2bb->bb = bb;
> +              r2bb->base = base;
> +              r2bb->offset = map.offset;
> +              r2bb->bitsize = map.bitsize;
> +              r2bb->bitpos = map.bitpos;
> +              r2bb->writable = map.writable;
> +              *slot = r2bb;
>              }
>          }
>      }
> 
> 
> Thanks,
> -Hao
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to