This speeds up IVOPTs by optimizing its hottest function when compiling
polyhedron linpk.  The datastructure used for recording use, candidate
costs (a hashtable) should make O(1) queries on average - but it turns
out that for use, candidate queries that have no entry recorded in
it it is O(n) currently.  That's because the collision handling does
not abort when coming across an empty hashtable entry (oops).

Overall compile-time effect is noise (linpk compiles in less than
a second for me ...), but callgrind tells me it results in a 3%
compile-time improvement.

There is still the degenerate cases when the hashtable is (almost)
full (doesn't happen for this testcase), but that's an unrelated issue.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Jakub, does this look ok for trunk now or shall I wait for stage1?

Thanks,
Richard.

2013-02-19  Richard Biener  <rguent...@suse.de>

        * tree-ssa-loop-ivopts.c (alloc_use_cost_map): Use bitmap_count_bits
        and ceil_log2.
        (get_use_iv_cost): Terminate hashtable walk when coming across
        an empty entry.

Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
*** gcc/tree-ssa-loop-ivopts.c  (revision 196135)
--- gcc/tree-ssa-loop-ivopts.c  (working copy)
*************** record_important_candidates (struct ivop
*** 2574,2599 ****
  static void
  alloc_use_cost_map (struct ivopts_data *data)
  {
!   unsigned i, size, s, j;
  
    for (i = 0; i < n_iv_uses (data); i++)
      {
        struct iv_use *use = iv_use (data, i);
-       bitmap_iterator bi;
  
        if (data->consider_all_candidates)
        size = n_iv_cands (data);
        else
        {
!         s = 0;
!         EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
!           {
!             s++;
!           }
  
          /* Round up to the power of two, so that moduling by it is fast.  */
!         for (size = 1; size < s; size <<= 1)
!           continue;
        }
  
        use->n_map_members = size;
--- 2574,2593 ----
  static void
  alloc_use_cost_map (struct ivopts_data *data)
  {
!   unsigned i, size, s;
  
    for (i = 0; i < n_iv_uses (data); i++)
      {
        struct iv_use *use = iv_use (data, i);
  
        if (data->consider_all_candidates)
        size = n_iv_cands (data);
        else
        {
!         s = bitmap_count_bits (use->related_cands);
  
          /* Round up to the power of two, so that moduling by it is fast.  */
!         size = 1 << ceil_log2 (s + 1);
        }
  
        use->n_map_members = size;
*************** get_use_iv_cost (struct ivopts_data *dat
*** 2731,2740 ****
    for (i = s; i < use->n_map_members; i++)
      if (use->cost_map[i].cand == cand)
        return use->cost_map + i;
! 
    for (i = 0; i < s; i++)
      if (use->cost_map[i].cand == cand)
        return use->cost_map + i;
  
    return NULL;
  }
--- 2725,2737 ----
    for (i = s; i < use->n_map_members; i++)
      if (use->cost_map[i].cand == cand)
        return use->cost_map + i;
!     else if (use->cost_map[i].cand == NULL)
!       return NULL;
    for (i = 0; i < s; i++)
      if (use->cost_map[i].cand == cand)
        return use->cost_map + i;
+     else if (use->cost_map[i].cand == NULL)
+       return NULL;
  
    return NULL;
  }

Reply via email to