Hi,
this implements the heuristics increasing bounds for functions having
__builtin_constant_p on parameters.  Note that for get_order this is
still not enough, since we increase the bound twice if hint applies, so
it goes from 70 to 140 and not to 190 needed, however it will handle
ohter similar cases.

If hint weight is increased to 300%, so 210 I get:
hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build10/gcc$ ./xgcc -B ./ -O2 
-Winline pipe.i --param inline-heuristics-hint-percent=300
In file included from fs/pipe.c:11:
./include/linux/slab.h: In function ‘alloc_pipe_info’:
./include/linux/slab.h:586:121: warning: inlining failed in call to 
‘kmalloc_array.constprop’: --param max-inline-insns-single limit reached
[-Winline]
./include/linux/slab.h:605:9: note: called from here
./include/linux/slab.h: In function ‘pipe_resize_ring’:
./include/linux/slab.h:586:121: warning: inlining failed in call to 
‘kmalloc_array.constprop’: --param max-inline-insns-single limit reached 
[-Winline]
./include/linux/slab.h:605:9: note: called from here

So the problem only shifts to not inlininig kmalloc_array.
(that is why it would be nice to update kernel with the easier
get_order)

However it shows different problem: ipa-cp produces cone of
kmalloc_array since it is always used by constant size, but the clone
does not update the predicates, so we lose track about the parameter
being constant and that is why we optimize out only late.

Martin, I think this is caused by long lasting TODO in
ipa_fn_summary_t::duplicate and probably we should implement it: based
on the known partial assignment of params to constant we should fold the
conditions in predicates.

Indeed with ./xgcc -B ./ -O2 -Winline pipe.i  -fno-ipa-cp --param 
inline-heuristics-hint-percent=300
the warning goes away.  We still need the stronger hint though.
gcc/ChangeLog:

2020-10-20  Jan Hubicka  <hubi...@ucw.cz>

        PR c/97445
        * ipa-fnsummary.c (ipa_dump_hints): Handle
        INLINE_HINT_builtin_constant_p.
        (ipa_fn_summary::~ipa_fn_summary): Free builtin_constant_p_parms.
        (ipa_fn_summary_t::duplicate): Copy builtin_constant_p_parms.
        (ipa_dump_fn_summary): Dump builtin_constant_p_parms.
        (set_cond_stmt_execution_predicate): Compute builtin_constant_p_parms.
        (ipa_call_context::estimate_size_and_time): Set
        INLINE_HINT_builtin_constant_p.
        (ipa_merge_fn_summary_after_inlining): Merge builtin_constant_p_parms.
        (inline_read_section): Stream builtin_constant_p_parms.
        (ipa_fn_summary_write): Stream builtin_constant_p_parms.
        * ipa-fnsummary.h (enum ipa_hints_vals): Add
        INLINE_HINT_builtin_constant_p.
        (ipa_fn_summary): Add builtin_constant_p_parms.
        * ipa-inline.c (want_inline_small_function_p): Handle
        INLINE_HINT_builtin_constant_p.
        (edge_badness): Handle INLINE_HINT_builtin_constant_p.

gcc/testsuite/ChangeLog:

2020-10-20  Jan Hubicka  <hubi...@ucw.cz>

        * gcc.dg/ipa/inlinehint-5.c: New test.


diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 9e3eda4d3cb..4292f1f5fe7 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -141,6 +141,11 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
       hints &= ~INLINE_HINT_known_hot;
       fprintf (f, " known_hot");
     }
+  if (hints & INLINE_HINT_builtin_constant_p)
+    {
+      hints &= ~INLINE_HINT_builtin_constant_p;
+      fprintf (f, " builtin_constant_p");
+    }
   gcc_assert (!hints);
 }
 
@@ -751,6 +756,7 @@ ipa_fn_summary::~ipa_fn_summary ()
   vec_free (call_size_time_table);
   vec_free (loop_iterations);
   vec_free (loop_strides);
+  vec_free (builtin_constant_p_parms);
 }
 
 void
@@ -805,6 +811,10 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
      that are known to be false or true.  */
   info->conds = vec_safe_copy (info->conds);
 
+  if (info->builtin_constant_p_parms)
+    info->builtin_constant_p_parms
+        = vec_safe_copy (info->builtin_constant_p_parms);
+
   /* When there are any replacements in the function body, see if we can figure
      out that something was optimized out.  */
   if (ipa_node_params_sum && dst->clone.tree_map)
@@ -1066,6 +1076,13 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
            fprintf (f, " inlinable");
          if (s->fp_expressions)
            fprintf (f, " fp_expression");
+         if (s->builtin_constant_p_parms)
+           {
+             fprintf (f, " builtin_constant_p_parms");
+             for (unsigned int i = 0;
+                  i < s->builtin_constant_p_parms->length (); i++)
+               fprintf (f, " %i", (*s->builtin_constant_p_parms)[i]);
+           }
          fprintf (f, "\n  global time:     %f\n", s->time.to_double ());
          fprintf (f, "  self size:       %i\n", ss->self_size);
          fprintf (f, "  global size:     %i\n", ss->size);
@@ -1598,6 +1615,8 @@ set_cond_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   op2 = gimple_call_arg (set_stmt, 0);
   if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
     return;
+  if (!aggpos.by_ref)
+    vec_safe_push (summary->builtin_constant_p_parms, index);
   FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
     {
       predicate p = add_condition (summary, params_summary, index,
@@ -3717,6 +3736,9 @@ ipa_call_context::estimate_size_and_time 
(ipa_call_estimates *estimates,
        hints |= INLINE_HINT_in_scc;
       if (DECL_DECLARED_INLINE_P (m_node->decl))
        hints |= INLINE_HINT_declared_inline;
+      if (info->builtin_constant_p_parms
+         && DECL_DECLARED_INLINE_P (m_node->decl))
+       hints |= INLINE_HINT_builtin_constant_p;
 
       ipa_freqcounting_predicate *fcp;
       for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
@@ -4044,8 +4066,13 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge 
*edge)
          operand_map[i] = map;
          gcc_assert (map < ipa_get_param_count (params_summary));
        }
+      int *ip;
+      for (i = 0; vec_safe_iterate (callee_info->builtin_constant_p_parms,
+          i, &ip); i++)
+       if (*ip < count && operand_map[*ip] > 0)
+         vec_safe_push (info->builtin_constant_p_parms, operand_map[*ip]);
     }
-  sreal freq =  edge->sreal_frequency ();
+  sreal freq = edge->sreal_frequency ();
   for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
     {
       predicate p;
@@ -4443,6 +4470,15 @@ inline_read_section (struct lto_file_decl_data 
*file_data, const char *data,
              vec_safe_push (info->loop_strides, fcp);
            }
        }
+      count2 = streamer_read_uhwi (&ib);
+      if (info && count2)
+       vec_safe_reserve_exact (info->builtin_constant_p_parms, count2);
+      for (j = 0; j < count2; j++)
+       {
+         int parm = streamer_read_uhwi (&ib);
+         if (info)
+           info->builtin_constant_p_parms->quick_push (parm);
+       }
       for (e = node->callees; e; e = e->next_callee)
        read_ipa_call_summary (&ib, e, info != NULL);
       for (e = node->indirect_calls; e; e = e->next_callee)
@@ -4618,6 +4654,13 @@ ipa_fn_summary_write (void)
              fcp->predicate->stream_out (ob);
              fcp->freq.stream_out (ob);
            }
+         streamer_write_uhwi (ob,
+                              vec_safe_length
+                                (info->builtin_constant_p_parms));
+         int *ip;
+         for (i = 0; vec_safe_iterate (info->builtin_constant_p_parms, i, &ip);
+              i++)
+           streamer_write_uhwi (ob, *ip);
          for (edge = cnode->callees; edge; edge = edge->next_callee)
            write_ipa_call_summary (ob, edge);
          for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
index f4dd5b85ab9..4c4a90dd622 100644
--- a/gcc/ipa-fnsummary.h
+++ b/gcc/ipa-fnsummary.h
@@ -49,7 +49,10 @@ enum ipa_hints_vals {
      Set by simple_edge_hints in ipa-inline-analysis.c.   */
   INLINE_HINT_cross_module = 64,
   /* We know that the callee is hot by profile.  */
-  INLINE_HINT_known_hot = 128
+  INLINE_HINT_known_hot = 128,
+  /* There is builtin_constant_p dependent on parameter which is usually
+     a strong hint to inline.  */
+  INLINE_HINT_builtin_constant_p = 256
 };
 
 typedef int ipa_hints;
@@ -123,10 +126,12 @@ public:
   ipa_fn_summary ()
     : min_size (0),
       inlinable (false), single_caller (false),
-      fp_expressions (false), estimated_stack_size (false),
+      fp_expressions (false),
+      estimated_stack_size (false),
       time (0), conds (NULL),
       size_time_table (NULL), call_size_time_table (NULL),
       loop_iterations (NULL), loop_strides (NULL),
+      builtin_constant_p_parms (NULL),
       growth (0), scc_no (0)
   {
   }
@@ -140,6 +145,7 @@ public:
     time (s.time), conds (s.conds), size_time_table (s.size_time_table),
     call_size_time_table (NULL),
     loop_iterations (s.loop_iterations), loop_strides (s.loop_strides),
+    builtin_constant_p_parms (s.builtin_constant_p_parms),
     growth (s.growth), scc_no (s.scc_no)
   {}
 
@@ -182,6 +188,8 @@ public:
   vec<ipa_freqcounting_predicate, va_gc> *loop_iterations;
   /* Predicates on when some loops in the function can have known strides.  */
   vec<ipa_freqcounting_predicate, va_gc> *loop_strides;
+  /* Parameters tested by builtin_constant_p.  */
+  vec<int, va_gc> * GTY((skip)) builtin_constant_p_parms;
   /* Estimated growth for inlining all copies of the function before start
      of small functions inlining.
      This value will get out of date as the callers are duplicated, but
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 225a0140725..99e6002149b 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -878,7 +878,8 @@ want_inline_small_function_p (struct cgraph_edge *e, bool 
report)
       bool apply_hints = (hints & (INLINE_HINT_indirect_call
                                   | INLINE_HINT_known_hot
                                   | INLINE_HINT_loop_iterations
-                                  | INLINE_HINT_loop_stride));
+                                  | INLINE_HINT_loop_stride
+                                  | INLINE_HINT_builtin_constant_p));
 
       if (growth <= opt_for_fn (to->decl,
                                param_max_inline_insns_size))
@@ -1314,7 +1315,8 @@ edge_badness (struct cgraph_edge *edge, bool dump)
     badness = badness.shift (badness > 0 ? 4 : -4);
   if ((hints & (INLINE_HINT_indirect_call
                | INLINE_HINT_loop_iterations
-               | INLINE_HINT_loop_stride))
+               | INLINE_HINT_loop_stride
+               | INLINE_HINT_builtin_constant_p))
       || callee_info->growth <= 0)
     badness = badness.shift (badness > 0 ? -2 : 2);
   if (hints & (INLINE_HINT_same_scc))
diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c 
b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c
new file mode 100644
index 00000000000..3dd3a11dd3e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c
@@ -0,0 +1,33 @@
+/* { dg-options "-O2 -fdump-ipa-inline-details -fno-early-inlining " } */
+/* { dg-add-options bind_pic_locally } */
+int j,k,l;
+int test3(int);
+int test4(int);
+
+static inline int
+test2(int i)
+{
+  if (__builtin_constant_p (i))
+    {
+       switch (i)
+       {
+       case 1: return j;
+       case 2: return k;
+       case 3: return l;
+       }
+    }
+  else return test3(i)+test4(i);
+}
+
+static inline int
+test (int i)
+{
+  return test2(i) + test2(i+1) + test3 (i) + test3(i) + test3(i);
+}
+
+int
+run (int i)
+{
+   return test (i);
+}
+/* { dg-final { scan-ipa-dump-times "hints: declared_inline 
builtin_constant_p" 3 "inline"  } } */

Reply via email to