On Tue, May 30, 2023 at 4:21 PM Jan Hubicka <hubi...@ucw.cz> wrote:
>
> > On Mon, May 29, 2023 at 6:20 PM Martin Jambor <mjam...@suse.cz> wrote:
> > >
> > > Hi,
> > >
> > > there have been concerns that linear searches through DECL_ARGUMENTS
> > > that are often necessary to compute the index of a particular
> > > PARM_DECL which is the key to results of IPA-CP can happen often
> > > enough to be a compile time issue, especially if we plug the results
> > > into value numbering, as I intend to do with a follow-up patch.
> > >
> > > This patch creates a hash map to do the look-up for all functions
> > > which have some information discovered by IPA-CP and which have 32
> > > parameters or more.  32 is a hard-wired magical constant here to
> > > capture the trade-off between the memory allocation overhead and
> > > length of the linear search.  I do not think it is worth making it a
> > > --param but if people think it appropriate, I can turn it into one.
> >
> > Since ipcp_transformation is short-lived (is it?) is it worth the trouble?
> > Comments below ...
>
> It lives from ipa-cp time to WPA stream-out or IPA transform stage,
> so memory consumption is a concern with -flto.
> > > +  m_tree_to_idx = hash_map<const_tree, unsigned>::create_ggc (c);
> > > +  unsigned index = 0;
> > > +  for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
> > > +    m_tree_to_idx->put (p, index);
> >
> > I think allocating the hash-map with 'c' for some numbers (depending
> > on the "prime"
> > chosen) will necessarily cause re-allocation of the hash since we keep a 
> > load
> > factor of at most 3/4 upon insertion.
> >
> > But - I wonder if a UID sorted array isn't a very much better data
> > structure for this?
> > That is, a vec<std::pair<int, unsigned> >?
>
> Yeah, I was thinking along this lines too.
> Having field directly in PARM_DECL node would be probably prettiest.
> In general this is probably not that important as wast amount of time we
> have few parameters and linear lookup is just fine.

There is 6 bits of DECL_OFFSET_ALIGN that could be re-purposed, but
64 parameters is a bit low.  _Maybe_ PARM_DECL doesn't need any of
the tree_base bits so could use the full word for sth else as well ...

I also though it might be interesting to only record PARM_DECLs that
we have interesting info for and skip VARYING ones.  So with an
indirection DECL_OFFSET_ALIGN -> index to non-varying param or
-1 the encoding space could shrink.

But still using a vec<> looks like a straight-forward improvement here.

Richard.

> Honza
> >
> > > +}
> > > diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
> > > index ab6de9f10da..f0976e363f7 100644
> > > --- a/gcc/ipa-prop.cc
> > > +++ b/gcc/ipa-prop.cc
> > > @@ -5776,16 +5776,9 @@ ipcp_get_parm_bits (tree parm, tree *value, 
> > > widest_int *mask)
> > >    if (!ts || vec_safe_length (ts->bits) == 0)
> > >      return false;
> > >
> > > -  int i = 0;
> > > -  for (tree p = DECL_ARGUMENTS (current_function_decl);
> > > -       p != parm; p = DECL_CHAIN (p))
> > > -    {
> > > -      i++;
> > > -      /* Ignore static chain.  */
> > > -      if (!p)
> > > -       return false;
> > > -    }
> > > -
> > > +  int i = ts->get_param_index (current_function_decl, parm);
> > > +  if (i < 0)
> > > +    return false;
> > >    clone_info *cinfo = clone_info::get (cnode);
> > >    if (cinfo && cinfo->param_adjustments)
> > >      {
> > > @@ -5802,16 +5795,12 @@ ipcp_get_parm_bits (tree parm, tree *value, 
> > > widest_int *mask)
> > >    return true;
> > >  }
> > >
> > > -
> > > -/* Update bits info of formal parameters as described in
> > > -   ipcp_transformation.  */
> > > +/* Update bits info of formal parameters of NODE as described in TS.  */
> > >
> > >  static void
> > > -ipcp_update_bits (struct cgraph_node *node)
> > > +ipcp_update_bits (struct cgraph_node *node, ipcp_transformation *ts)
> > >  {
> > > -  ipcp_transformation *ts = ipcp_get_transformation_summary (node);
> > > -
> > > -  if (!ts || vec_safe_length (ts->bits) == 0)
> > > +  if (vec_safe_is_empty (ts->bits))
> > >      return;
> > >    vec<ipa_bits *, va_gc> &bits = *ts->bits;
> > >    unsigned count = bits.length ();
> > > @@ -5913,14 +5902,12 @@ ipcp_update_bits (struct cgraph_node *node)
> > >      }
> > >  }
> > >
> > > -/* Update value range of formal parameters as described in
> > > -   ipcp_transformation.  */
> > > +/* Update value range of formal parameters of NODE as described in TS.  
> > > */
> > >
> > >  static void
> > > -ipcp_update_vr (struct cgraph_node *node)
> > > +ipcp_update_vr (struct cgraph_node *node, ipcp_transformation *ts)
> > >  {
> > > -  ipcp_transformation *ts = ipcp_get_transformation_summary (node);
> > > -  if (!ts || vec_safe_length (ts->m_vr) == 0)
> > > +  if (vec_safe_is_empty (ts->m_vr))
> > >      return;
> > >    const vec<ipa_vr, va_gc> &vr = *ts->m_vr;
> > >    unsigned count = vr.length ();
> > > @@ -5996,10 +5983,17 @@ ipcp_transform_function (struct cgraph_node *node)
> > >      fprintf (dump_file, "Modification phase of node %s\n",
> > >              node->dump_name ());
> > >
> > > -  ipcp_update_bits (node);
> > > -  ipcp_update_vr (node);
> > >    ipcp_transformation *ts = ipcp_get_transformation_summary (node);
> > > -  if (!ts || vec_safe_is_empty (ts->m_agg_values))
> > > +  if (!ts
> > > +      || (vec_safe_is_empty (ts->m_agg_values)
> > > +         && vec_safe_is_empty (ts->bits)
> > > +         && vec_safe_is_empty (ts->m_vr)))
> > > +    return 0;
> > > +
> > > +  ts->maybe_create_parm_idx_map (cfun->decl);
> > > +  ipcp_update_bits (node, ts);
> > > +  ipcp_update_vr (node, ts);
> > > +  if (vec_safe_is_empty (ts->m_agg_values))
> > >        return 0;
> > >    param_count = count_formal_params (node->decl);
> > >    if (param_count == 0)
> > > diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
> > > index f306f8a377e..211b12ff6b3 100644
> > > --- a/gcc/ipa-prop.h
> > > +++ b/gcc/ipa-prop.h
> > > @@ -925,16 +925,10 @@ ipa_is_param_used_by_polymorphic_call (class 
> > > ipa_node_params *info, int i)
> > >
> > >  struct GTY(()) ipcp_transformation
> > >  {
> > > -  /* Known aggregate values.  */
> > > -  vec<ipa_argagg_value, va_gc>  *m_agg_values;
> > > -  /* Known bits information.  */
> > > -  vec<ipa_bits *, va_gc> *bits;
> > > -  /* Value range information.  */
> > > -  vec<ipa_vr, va_gc> *m_vr;
> > > -
> > >    /* Default constructor.  */
> > >    ipcp_transformation ()
> > > -  : m_agg_values (NULL), bits (NULL), m_vr (NULL)
> > > +    : m_agg_values (nullptr), bits (nullptr), m_vr (nullptr),
> > > +    m_tree_to_idx (nullptr)
> > >    { }
> > >
> > >    /* Default destructor.  */
> > > @@ -944,6 +938,29 @@ struct GTY(()) ipcp_transformation
> > >      vec_free (bits);
> > >      vec_free (m_vr);
> > >    }
> > > +
> > > +  /* Given PARAM which must be a parameter of function FNDECL described 
> > > by
> > > +     THIS, return its index in the DECL_ARGUMENTS chain, using a 
> > > pre-computed
> > > +     hash map if avialable (which is pre-computed only if there are many
> > > +     parameters).  Can return -1 if param is static chain not 
> > > represented among
> > > +     DECL_ARGUMENTS. */
> > > +  int get_param_index (const_tree fndecl, const_tree param) const;
> > > +
> > > +  /* Assuming THIS describes FUNC and it has sufficiently many 
> > > parameters to
> > > +     justify the overhead, creat a has map from parameter trees to their
> > > +     indices.  */
> > > +
> > > +  void maybe_create_parm_idx_map (tree fndecl);
> > > +
> > > +  /* Known aggregate values.  */
> > > +  vec<ipa_argagg_value, va_gc>  *m_agg_values;
> > > +  /* Known bits information.  */
> > > +  vec<ipa_bits *, va_gc> *bits;
> > > +  /* Value range information.  */
> > > +  vec<ipa_vr, va_gc> *m_vr;
> > > +  /* If there are many parameters, a hash map to speed-up look-ups of 
> > > their
> > > +     indices.  */
> > > +  hash_map<const_tree, unsigned> *m_tree_to_idx;
> > >  };
> > >
> > >  inline
> > > --
> > > 2.40.1
> > >

Reply via email to