Hi Juzhe,

> +max_number_of_live_regs (const basic_block bb,
> +                      const hash_map<tree, pair> &live_ranges,
> +                      unsigned int max_point, machine_mode biggest_mode,
> +                      int lmul)
> +{
> +  unsigned int max_nregs = 0;
> +  unsigned int i;
> +  unsigned int live_point = 0;
> +  auto_vec<unsigned int> live_vars_vec;
> +  live_vars_vec.safe_grow (max_point + 1, true);
> +  for (i = 0; i < live_vars_vec.length (); ++i)
> +    live_vars_vec[i] = 0;
> +  for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
> +       iter != live_ranges.end (); ++iter)
> +    {
> +      tree var = (*iter).first;
> +      pair live_range = (*iter).second;
> +      for (i = live_range.first; i <= live_range.second; i++)
> +     {
> +       machine_mode mode = TYPE_MODE (TREE_TYPE (var));
> +       unsigned int nregs
> +         = compute_nregs_for_mode (mode, biggest_mode, lmul);
> +       live_vars_vec[i] += nregs;
> +       if (live_vars_vec[i] > max_nregs)
> +         max_nregs = live_vars_vec[i];
> +     }
> +    }

My concern is that we have O(nm) here, where n = number of live_ranges
and m = size of live range.  In large basic blocks (think calculix of
SPECfp 2006 which can reach up to 2000 instructions IIRC) this might
become prohibitive.

I'm going to do a quick benchmark with calculix and report back.  If
there is no noticable difference we can ditch my idea.

For short live ranges (like < 10) the O(nm) could be better.  As of now,
we still calculate the nregs n*m times, though.  I have something like
the following in mind (it is definitely not shorter, though):

  struct range {
      unsigned int pt;
      bool start;
      unsigned int nregs;
  };

  auto_vec<range> ranges (2 * live_ranges.elements ());
  for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
       iter != live_ranges.end (); ++iter)
    {
      tree var = (*iter).first;
      machine_mode mode = TYPE_MODE (TREE_TYPE (var));
      unsigned int nregs
          = compute_nregs_for_mode (mode, biggest_mode, lmul);
      ranges.quick_push ({(*iter).second.first, true, nregs});
      ranges.quick_push ({(*iter).second.second, false, nregs});
    }

  ranges.qsort ([] (const void *a, const void *b) -> int {
                unsigned int aa = ((const range *)a)->pt;
                unsigned int bb = ((const range *)b)->pt;
                if (aa < bb)
                  return -1;
                if (aa == bb)
                  return 0;
                return 1;
                });

  unsigned int cur = 0;
  max_nregs = ranges[0].nregs;

  for (auto r : ranges)
    {
      if (r.start)
        cur += r.nregs;
      else
        cur -= r.nregs;
      max_nregs = MAX (max_nregs, cur);
    }

> +  for (i = 0; i < cfun->gimple_df->ssa_names->length (); i++)
> +    {
> +      tree t = ssa_name (i);
> +      if (!t)
> +       continue;

Could likely be replaced by

  tree t;
  FOR_EACH_SSA_NAME (i, t, cfun)

> +static void
> +update_local_live_ranges (
> +  vec_info *vinfo,
> +  hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
> +  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
> +{

I just realized (sorry) that this is "nested" a bit far.  Can we still
have e.g. 

> +  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
> +    {

this,

> +           if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
> +               != undef_vec_info_type)

this,

> +                   if (live_range)
> +                     {

and this just "continue"?

Apart from that, LGTM.

Regards
 Robin

Reply via email to