Hi,

On Thu, Aug 20 2020, Richard Sandiford wrote:
>>
>>
>> Really appreciate for your detailed explanation.  BTW, My previous patch
>> for PGO build on exchange2 takes this similar method by setting each cloned
>> node to 1/10th of the frequency several month agao :)
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2020-June/546926.html
>
> Does it seem likely that we'll reach a resolution on this soon?
> I take the point that the patch that introduced the exchange regression
> [https://gcc.gnu.org/pipermail/gcc-patches/2020-August/551757.html]
> was just uncovering a latent issue, but still, this is a large regression
> in an important benchmark to be carrying around.  For those of us doing
> regular benchmark CI, the longer the performance trough gets, the harder
> it is to spot other unrelated regressions in the “properly optimised” code.
>
> So if we don't have a specific plan for fixing the regression soon,
> I think we should consider reverting the patch until we have something
> that avoids the exchange regression, even though the patch wasn't
> technically wrong.

Honza's changes have been motivated to big extent as an enabler for
IPA-CP heuristics changes to actually speed up 548.exchange2_r.

On my AMD Zen2 machine, the run-time of exchange2 was 358 seconds two
weeks ago, this week it is 403, but with my WIP (and so far untested)
patch below it is just 276 seconds - faster than one built with GCC 8
which needs 283 seconds.

I'll be interested in knowing if it also works this well on other
architectures.

The patch still needs a bit of a cleanup.  The change of the default
value of ipa-cp-unit-growth needs to be done only for small compilation
units (like inlining does).  I should experiment if the value of
param_ipa_cp_loop_hint_bonus should be changed or not.  And last but not
least, I also want to clean-up the interfaces between ipa-fnsummary.c
and ipa-cp.c a bit.  I am working on all of this and hope to finish the
patch set in a few (working) days.

The bottom line is that there is a plan to address this regression.

Martin



diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index e4910a04ffa..0d44310503a 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -3190,11 +3190,23 @@ devirtualization_time_bonus (struct cgraph_node *node,
 /* Return time bonus incurred because of HINTS.  */
 
 static int
-hint_time_bonus (cgraph_node *node, ipa_hints hints)
+hint_time_bonus (cgraph_node *node, ipa_hints hints, sreal known_iter_freq,
+                sreal known_strides_freq)
 {
   int result = 0;
-  if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
-    result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
+  sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
+
+  if (hints & INLINE_HINT_loop_iterations)
+    {
+      /* FIXME: This should probably be way more nuanced.  */
+      result += (known_iter_freq * bonus_for_one).to_int ();
+    }
+  if (hints & INLINE_HINT_loop_stride)
+    {
+      /* FIXME: And this as well.  */
+      result += (known_strides_freq * bonus_for_one).to_int ();
+    }
+
   return result;
 }
 
@@ -3395,12 +3407,13 @@ perform_estimation_of_a_value (cgraph_node *node, 
vec<tree> known_csts,
                               int est_move_cost, ipcp_value_base *val)
 {
   int size, time_benefit;
-  sreal time, base_time;
+  sreal time, base_time, known_iter_freq, known_strides_freq;
   ipa_hints hints;
 
   estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts,
                                     known_aggs, &size, &time,
-                                    &base_time, &hints);
+                                    &base_time, &hints, &known_iter_freq,
+                                    &known_strides_freq);
   base_time -= time;
   if (base_time > 65535)
     base_time = 65535;
@@ -3414,7 +3427,7 @@ perform_estimation_of_a_value (cgraph_node *node, 
vec<tree> known_csts,
     time_benefit = base_time.to_int ()
       + devirtualization_time_bonus (node, known_csts, known_contexts,
                                     known_aggs)
-      + hint_time_bonus (node, hints)
+      + hint_time_bonus (node, hints, known_iter_freq, known_strides_freq)
       + removable_params_cost + est_move_cost;
 
   gcc_checking_assert (size >=0);
@@ -3476,7 +3489,7 @@ estimate_local_effects (struct cgraph_node *node)
     {
       struct caller_statistics stats;
       ipa_hints hints;
-      sreal time, base_time;
+      sreal time, base_time, known_iter_freq, known_strides_freq;
       int size;
 
       init_caller_stats (&stats);
@@ -3484,9 +3497,11 @@ estimate_local_effects (struct cgraph_node *node)
                                              false);
       estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts,
                                         known_aggs, &size, &time,
-                                        &base_time, &hints);
+                                        &base_time, &hints, &known_iter_freq,
+                                        &known_strides_freq);
       time -= devirt_bonus;
-      time -= hint_time_bonus (node, hints);
+      time -= hint_time_bonus (node, hints, known_iter_freq,
+                              known_strides_freq);
       time -= removable_params_cost;
       size -= stats.n_calls * removable_params_cost;
 
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 2cfab40156e..29fac5db19f 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -310,6 +310,35 @@ set_hint_predicate (predicate **p, predicate new_predicate)
     }
 }
 
+/* Find if NEW_PREDICATE is already in V and if so, increment its freq.
+   Otherwise add a new item to the vector with this predicate and frerq equal
+   to add_freq.  */
+
+static void
+add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
+                           const predicate &new_predicate, sreal add_freq)
+{
+  if (new_predicate == false || new_predicate == true)
+    return;
+  ipa_freqcounting_predicate *f;
+  for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
+    if (new_predicate == f->predicate)
+      {
+       f->freq += add_freq;
+       return;
+      }
+  /* FIXME: Make this a parameter.  */
+  if (vec_safe_length (*v) >= 32)
+    /* Too many different predicates to account for.  */
+    return;
+
+  ipa_freqcounting_predicate fcp;
+  fcp.predicate = NULL;
+  set_hint_predicate (&fcp.predicate, new_predicate);
+  fcp.freq = add_freq;
+  vec_safe_push (*v, fcp);
+  return;
+}
 
 /* Compute what conditions may or may not hold given information about
    parameters.  RET_CLAUSE returns truths that may hold in a specialized copy,
@@ -722,10 +751,12 @@ ipa_call_summary::~ipa_call_summary ()
 
 ipa_fn_summary::~ipa_fn_summary ()
 {
-  if (loop_iterations)
-    edge_predicate_pool.remove (loop_iterations);
-  if (loop_stride)
-    edge_predicate_pool.remove (loop_stride);
+  unsigned len = vec_safe_length (loop_iterations);
+  for (unsigned i = 0; i < len; i++)
+    edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
+  len = vec_safe_length (loop_strides);
+  for (unsigned i = 0; i < len; i++)
+    edge_predicate_pool.remove ((*loop_strides)[i].predicate);
   vec_free (conds);
   vec_free (size_time_table);
   vec_free (call_size_time_table);
@@ -741,24 +772,33 @@ ipa_fn_summary_t::remove_callees (cgraph_node *node)
     ipa_call_summaries->remove (e);
 }
 
-/* Same as remap_predicate_after_duplication but handle hint predicate *P.
-   Additionally care about allocating new memory slot for updated predicate
-   and set it to NULL when it becomes true or false (and thus uninteresting).
- */
+/* Duplicate predicates in loop hint vector, allocating memory for them and
+   remove and deallocate any uninteresting (true or false) ones.  Return the
+   result.  */
 
-static void
-remap_hint_predicate_after_duplication (predicate **p,
-                                       clause_t possible_truths)
+static vec<ipa_freqcounting_predicate, va_gc> *
+remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
+                                   clause_t possible_truths)
 {
-  predicate new_predicate;
+  if (vec_safe_length (v) == 0)
+    return NULL;
 
-  if (!*p)
-    return;
+  vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
+  int len = res->length();
+  for (int i = len - 1; i >= 0; i--)
+    {
+      predicate new_predicate
+       = (*res)[i].predicate->remap_after_duplication (possible_truths);
+      /* We do not want to free previous predicate; it is used by node
+        origin.  */
+      (*res)[i].predicate = NULL;
+      set_hint_predicate (&(*res)[i].predicate, new_predicate);
+
+      if (!(*res)[i].predicate)
+       res->unordered_remove (i);
+    }
 
-  new_predicate = (*p)->remap_after_duplication (possible_truths);
-  /* We do not want to free previous predicate; it is used by node origin.  */
-  *p = NULL;
-  set_hint_predicate (p, new_predicate);
+  return res;
 }
 
 
@@ -874,9 +914,11 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
            optimized_out_size += es->call_stmt_size * 
ipa_fn_summary::size_scale;
          edge_set_predicate (edge, &new_predicate);
        }
-      remap_hint_predicate_after_duplication (&info->loop_iterations,
+      info->loop_iterations
+       = remap_freqcounting_preds_after_dup (info->loop_iterations,
                                              possible_truths);
-      remap_hint_predicate_after_duplication (&info->loop_stride,
+      info->loop_strides
+       = remap_freqcounting_preds_after_dup (info->loop_strides,
                                              possible_truths);
 
       /* If inliner or someone after inliner will ever start producing
@@ -888,17 +930,21 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
   else
     {
       info->size_time_table = vec_safe_copy (info->size_time_table);
-      if (info->loop_iterations)
+      info->loop_iterations = vec_safe_copy (info->loop_iterations);
+      info->loop_strides = vec_safe_copy (info->loop_strides);
+
+      ipa_freqcounting_predicate *f;
+      for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
        {
-         predicate p = *info->loop_iterations;
-         info->loop_iterations = NULL;
-         set_hint_predicate (&info->loop_iterations, p);
+         predicate p = *f->predicate;
+         f->predicate = NULL;
+         set_hint_predicate (&f->predicate, p);
        }
-      if (info->loop_stride)
+      for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
        {
-         predicate p = *info->loop_stride;
-         info->loop_stride = NULL;
-         set_hint_predicate (&info->loop_stride, p);
+         predicate p = *f->predicate;
+         f->predicate = NULL;
+         set_hint_predicate (&f->predicate, p);
        }
     }
   if (!dst->inlined_to)
@@ -1057,15 +1103,28 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
                }
              fprintf (f, "\n");
            }
-         if (s->loop_iterations)
+         ipa_freqcounting_predicate *fcp;
+         bool first_fcp = true;
+         for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
            {
-             fprintf (f, "  loop iterations:");
-             s->loop_iterations->dump (f, s->conds);
+             if (first_fcp)
+               {
+                 fprintf (f, "  loop iterations:");
+                 first_fcp = false;
+               }
+             fprintf (f, "  %3.2f for ", fcp->freq.to_double ());
+             fcp->predicate->dump (f, s->conds);
            }
-         if (s->loop_stride)
+         first_fcp = true;
+         for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
            {
-             fprintf (f, "  loop stride:");
-             s->loop_stride->dump (f, s->conds);
+             if (first_fcp)
+               {
+                 fprintf (f, "  loop strides:");
+                 first_fcp = false;
+               }
+             fprintf (f, "  %3.2f for :", fcp->freq.to_double ());
+             fcp->predicate->dump (f, s->conds);
            }
          fprintf (f, "  calls:\n");
          dump_ipa_call_summary (f, 4, node, s);
@@ -2514,12 +2573,13 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
 
   if (fbi.info)
     compute_bb_predicates (&fbi, node, info, params_summary);
+  const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
   order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
   nblocks = pre_and_rev_post_order_compute (NULL, order, false);
   for (n = 0; n < nblocks; n++)
     {
       bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
-      freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
+      freq = bb->count.to_sreal_scale (entry_count);
       if (clobber_only_eh_bb_p (bb))
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2758,23 +2818,27 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
 
   if (nonconstant_names.exists () && !early)
     {
+      ipa_fn_summary *s = ipa_fn_summaries->get (node);
       class loop *loop;
-      predicate loop_iterations = true;
-      predicate loop_stride = true;
 
       if (dump_file && (dump_flags & TDF_DETAILS))
        flow_loops_dump (dump_file, NULL, 0);
       scev_initialize ();
       FOR_EACH_LOOP (loop, 0)
        {
+         predicate loop_iterations = true;
+         sreal header_freq;
          vec<edge> exits;
          edge ex;
          unsigned int j;
          class tree_niter_desc niter_desc;
          if (loop->header->aux)
-           bb_predicate = *(predicate *) loop->header->aux;
+           {
+             bb_predicate = *(predicate *) loop->header->aux;
+             header_freq = loop->header->count.to_sreal_scale (entry_count);
+           }
          else
-           bb_predicate = false;
+           continue;
 
          exits = get_loop_exit_edges (loop);
          FOR_EACH_VEC_ELT (exits, j, ex)
@@ -2790,10 +2854,10 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
                will_be_nonconstant = bb_predicate & will_be_nonconstant;
              if (will_be_nonconstant != true
                  && will_be_nonconstant != false)
-               /* This is slightly inprecise.  We may want to represent each
-                  loop with independent predicate.  */
                loop_iterations &= will_be_nonconstant;
            }
+         add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
+                                     header_freq);
          exits.release ();
        }
 
@@ -2803,14 +2867,20 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
       for (loop = loops_for_fn (cfun)->tree_root->inner;
           loop != NULL; loop = loop->next)
        {
+         predicate loop_stride = true;
+         sreal bb_freq;
          basic_block *body = get_loop_body (loop);
          for (unsigned i = 0; i < loop->num_nodes; i++)
            {
              gimple_stmt_iterator gsi;
              if (body[i]->aux)
-               bb_predicate = *(predicate *) body[i]->aux;
+               {
+                 bb_predicate = *(predicate *) body[i]->aux;
+                 bb_freq = body[i]->count.to_sreal_scale (entry_count);
+               }
              else
-               bb_predicate = false;
+               continue;
+
              for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
                   gsi_next (&gsi))
                {
@@ -2839,16 +2909,13 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
                    will_be_nonconstant = bb_predicate & will_be_nonconstant;
                  if (will_be_nonconstant != true
                      && will_be_nonconstant != false)
-                   /* This is slightly inprecise.  We may want to represent
-                      each loop with independent predicate.  */
                    loop_stride = loop_stride & will_be_nonconstant;
                }
            }
+         add_freqcounting_predicate (&s->loop_strides, loop_stride,
+                                     bb_freq);
          free (body);
        }
-      ipa_fn_summary *s = ipa_fn_summaries->get (node);
-      set_hint_predicate (&s->loop_iterations, loop_iterations);
-      set_hint_predicate (&s->loop_stride, loop_stride);
       scev_finalize ();
     }
   FOR_ALL_BB_FN (bb, my_function)
@@ -3640,14 +3707,32 @@ ipa_call_context::estimate_size_and_time (int *ret_size,
   if (time > nonspecialized_time)
     time = nonspecialized_time;
 
+  m_loops_with_known_iterations = 0;
+  ipa_freqcounting_predicate *fcp;
+  for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
+    {
+      gcc_assert (fcp->predicate);
+      if (!fcp->predicate->evaluate (m_possible_truths))
+       {
+         if (ret_hints)
+           hints |= INLINE_HINT_loop_iterations;
+         m_loops_with_known_iterations += fcp->freq;
+       }
+    }
+  m_loops_with_known_strides = 0;
+  for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
+    {
+      gcc_assert (fcp->predicate);
+      if (!fcp->predicate->evaluate (m_possible_truths))
+       {
+         if (ret_hints)
+           hints |= INLINE_HINT_loop_stride;
+         m_loops_with_known_strides += fcp->freq;
+       }
+    }
+
   if (ret_hints)
     {
-      if (info->loop_iterations
-         && !info->loop_iterations->evaluate (m_possible_truths))
-       hints |= INLINE_HINT_loop_iterations;
-      if (info->loop_stride
-         && !info->loop_stride->evaluate (m_possible_truths))
-       hints |= INLINE_HINT_loop_stride;
       if (info->scc_no)
        hints |= INLINE_HINT_in_scc;
       if (DECL_DECLARED_INLINE_P (m_node->decl))
@@ -3687,7 +3772,9 @@ estimate_ipcp_clone_size_and_time (struct cgraph_node 
*node,
                                   vec<ipa_agg_value_set> known_aggs,
                                   int *ret_size, sreal *ret_time,
                                   sreal *ret_nonspec_time,
-                                  ipa_hints *hints)
+                                  ipa_hints *hints,
+                                  sreal *loops_with_known_iterations,
+                                  sreal *loops_with_known_strides)
 {
   clause_t clause, nonspec_clause;
 
@@ -3699,6 +3786,8 @@ estimate_ipcp_clone_size_and_time (struct cgraph_node 
*node,
                        known_aggs, vNULL);
   ctx.estimate_size_and_time (ret_size, NULL, ret_time,
                              ret_nonspec_time, hints);
+  *loops_with_known_iterations = ctx.m_loops_with_known_iterations;
+  *loops_with_known_strides = ctx.m_loops_with_known_strides;
 }
 
 /* Return stack frame offset where frame of NODE is supposed to start inside
@@ -3857,32 +3946,31 @@ remap_edge_summaries (struct cgraph_edge *inlined_edge,
     }
 }
 
-/* Same as remap_predicate, but set result into hint *HINT.  */
+/* Run remap_after_inlining on each predicate in V.  */
 
 static void
-remap_hint_predicate (class ipa_fn_summary *info,
-                     class ipa_node_params *params_summary,
-                     class ipa_fn_summary *callee_info,
-                     predicate **hint,
-                     vec<int> operand_map,
-                     vec<int> offset_map,
-                     clause_t possible_truths,
-                     predicate *toplev_predicate)
-{
-  predicate p;
+remap_freqcounting_predicate (class ipa_fn_summary *info,
+                             class ipa_node_params *params_summary,
+                             class ipa_fn_summary *callee_info,
+                             vec<ipa_freqcounting_predicate, va_gc> *v,
+                             vec<int> operand_map,
+                             vec<int> offset_map,
+                             clause_t possible_truths,
+                             predicate *toplev_predicate)
 
-  if (!*hint)
-    return;
-  p = (*hint)->remap_after_inlining
-                        (info, params_summary, callee_info,
-                         operand_map, offset_map,
-                         possible_truths, *toplev_predicate);
-  if (p != false && p != true)
+{
+  ipa_freqcounting_predicate *fcp;
+  for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
     {
-      if (!*hint)
-       set_hint_predicate (hint, p);
-      else
-       **hint &= p;
+      predicate p
+       = fcp->predicate->remap_after_inlining (info, params_summary,
+                                               callee_info, operand_map,
+                                               offset_map, possible_truths,
+                                               *toplev_predicate);
+      if (p != false && p != true)
+       /* FIXME: Is this really supposed to be &= and not a plain
+          assignment?  */
+       *fcp->predicate &= p;
     }
 }
 
@@ -3992,12 +4080,12 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge 
*edge)
   remap_edge_summaries (edge, edge->callee, info, params_summary,
                        callee_info, operand_map,
                        offset_map, clause, &toplev_predicate);
-  remap_hint_predicate (info, params_summary, callee_info,
-                       &callee_info->loop_iterations,
-                       operand_map, offset_map, clause, &toplev_predicate);
-  remap_hint_predicate (info, params_summary, callee_info,
-                       &callee_info->loop_stride,
-                       operand_map, offset_map, clause, &toplev_predicate);
+  remap_freqcounting_predicate (info, params_summary, callee_info,
+                               info->loop_iterations, operand_map,
+                               offset_map, clause, &toplev_predicate);
+  remap_freqcounting_predicate (info, params_summary, callee_info,
+                               info->loop_strides, operand_map,
+                               offset_map, clause, &toplev_predicate);
 
   HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
   HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
@@ -4322,12 +4410,34 @@ inline_read_section (struct lto_file_decl_data 
*file_data, const char *data,
            info->size_time_table->quick_push (e);
        }
 
-      p.stream_in (&ib);
-      if (info)
-        set_hint_predicate (&info->loop_iterations, p);
-      p.stream_in (&ib);
-      if (info)
-        set_hint_predicate (&info->loop_stride, p);
+      count2 = streamer_read_uhwi (&ib);
+      for (j = 0; j < count2; j++)
+       {
+         p.stream_in (&ib);
+         sreal fcp_freq = sreal::stream_in (&ib);
+         if (info)
+           {
+             ipa_freqcounting_predicate fcp;
+             fcp.predicate = NULL;
+             set_hint_predicate (&fcp.predicate, p);
+             fcp.freq = fcp_freq;
+             vec_safe_push (info->loop_iterations, fcp);
+           }
+       }
+      count2 = streamer_read_uhwi (&ib);
+      for (j = 0; j < count2; j++)
+       {
+         p.stream_in (&ib);
+         sreal fcp_freq = sreal::stream_in (&ib);
+         if (info)
+           {
+             ipa_freqcounting_predicate fcp;
+             fcp.predicate = NULL;
+             set_hint_predicate (&fcp.predicate, p);
+             fcp.freq = fcp_freq;
+             vec_safe_push (info->loop_strides, fcp);
+           }
+       }
       for (e = node->callees; e; e = e->next_callee)
        read_ipa_call_summary (&ib, e, info != NULL);
       for (e = node->indirect_calls; e; e = e->next_callee)
@@ -4487,14 +4597,19 @@ ipa_fn_summary_write (void)
              e->exec_predicate.stream_out (ob);
              e->nonconst_predicate.stream_out (ob);
            }
-         if (info->loop_iterations)
-           info->loop_iterations->stream_out (ob);
-         else
-           streamer_write_uhwi (ob, 0);
-         if (info->loop_stride)
-           info->loop_stride->stream_out (ob);
-         else
-           streamer_write_uhwi (ob, 0);
+         ipa_freqcounting_predicate *fcp;
+         streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
+         for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
+           {
+             fcp->predicate->stream_out (ob);
+             fcp->freq.stream_out (ob);
+           }
+         streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
+         for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
+           {
+             fcp->predicate->stream_out (ob);
+             fcp->freq.stream_out (ob);
+           }
          for (edge = cnode->callees; edge; edge = edge->next_callee)
            write_ipa_call_summary (ob, edge);
          for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
index c6ddc9f3199..d8429afdbef 100644
--- a/gcc/ipa-fnsummary.h
+++ b/gcc/ipa-fnsummary.h
@@ -101,6 +101,19 @@ public:
   }
 };
 
+/* Structure to capture how frequently some interesting events occur given a
+   particular predicate.  The structure is used to estimate how often we
+   encounter loops with known iteration count or stride in various
+   contexts.  */
+
+struct GTY(()) ipa_freqcounting_predicate
+{
+  /* The described event happens with this frequency... */
+  sreal freq;
+  /* ...when this predicate evaluates to false. */
+  class predicate * GTY((skip)) predicate;
+};
+
 /* Function inlining information.  */
 class GTY(()) ipa_fn_summary
 {
@@ -112,8 +125,9 @@ public:
       inlinable (false), single_caller (false),
       fp_expressions (false), estimated_stack_size (false),
       time (0), conds (NULL),
-      size_time_table (NULL), call_size_time_table (NULL), loop_iterations 
(NULL),
-      loop_stride (NULL), growth (0), scc_no (0)
+      size_time_table (NULL), call_size_time_table (NULL),
+      loop_iterations (NULL), loop_strides (NULL),
+      growth (0), scc_no (0)
   {
   }
 
@@ -125,13 +139,12 @@ public:
     estimated_stack_size (s.estimated_stack_size),
     time (s.time), conds (s.conds), size_time_table (s.size_time_table),
     call_size_time_table (NULL),
-    loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
+    loop_iterations (s.loop_iterations), loop_strides (s.loop_strides),
     growth (s.growth), scc_no (s.scc_no)
   {}
 
   /* Default constructor.  */
   ~ipa_fn_summary ();
-
   /* Information about the function body itself.  */
 
   /* Minimal size increase after inlining.  */
@@ -164,12 +177,10 @@ public:
   vec<size_time_entry, va_gc> *size_time_table;
   vec<size_time_entry, va_gc> *call_size_time_table;
 
-  /* Predicate on when some loop in the function becomes to have known
-     bounds.   */
-  predicate * GTY((skip)) loop_iterations;
-  /* Predicate on when some loop in the function becomes to have known
-     stride.   */
-  predicate * GTY((skip)) loop_stride;
+  /* Predicates on when some loops in the function can have known bounds.  */
+  vec<ipa_freqcounting_predicate, va_gc> *loop_iterations;
+  /* Predicates on when some loops in the function can have known strides.  */
+  vec<ipa_freqcounting_predicate, va_gc> *loop_strides;
   /* Estimated growth for inlining all copies of the function before start
      of small functions inlining.
      This value will get out of date as the callers are duplicated, but
@@ -316,6 +327,15 @@ public:
   {
     return m_node != NULL;
   }
+
+
+  /* How often loops will have known iterations.  Calculated in
+     estimate_size_and_time.  */
+  sreal m_loops_with_known_iterations;
+  /* How often loops will have known strides.  Calculated in
+     estimate_size_and_time. */
+  sreal m_loops_with_known_strides;
+
 private:
   /* Called function.  */
   cgraph_node *m_node;
@@ -353,7 +373,7 @@ void estimate_ipcp_clone_size_and_time (struct cgraph_node 
*,
                                        vec<ipa_polymorphic_call_context>,
                                        vec<ipa_agg_value_set>,
                                        int *, sreal *, sreal *,
-                                       ipa_hints *);
+                                       ipa_hints *, sreal *, sreal *);
 void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge);
 void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = 
true);
 void compute_fn_summary (struct cgraph_node *, bool);
diff --git a/gcc/params.opt b/gcc/params.opt
index f39e5d1a012..2a5f3d61727 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -211,7 +211,7 @@ Common Joined UInteger 
Var(param_ipa_cp_single_call_penalty) Init(15) IntegerRan
 Percentage penalty functions containing a single call to another function will 
receive when they are evaluated for cloning.
 
 -param=ipa-cp-unit-growth=
-Common Joined UInteger Var(param_ipa_cp_unit_growth) Init(10) Param 
Optimization
+Common Joined UInteger Var(param_ipa_cp_unit_growth) Init(80) Param 
Optimization
 How much can given compilation unit grow because of the interprocedural 
constant propagation (in percent).
 
 -param=ipa-cp-value-list-size=

Reply via email to