Hi, this implements the heuristics increasing bounds for functions having __builtin_constant_p on parameters. Note that for get_order this is still not enough, since we increase the bound twice if hint applies, so it goes from 70 to 140 and not to 190 needed, however it will handle ohter similar cases.
If hint weight is increased to 300%, so 210 I get: hubicka@lomikamen-jh:/aux/hubicka/trunk-git/build10/gcc$ ./xgcc -B ./ -O2 -Winline pipe.i --param inline-heuristics-hint-percent=300 In file included from fs/pipe.c:11: ./include/linux/slab.h: In function ‘alloc_pipe_info’: ./include/linux/slab.h:586:121: warning: inlining failed in call to ‘kmalloc_array.constprop’: --param max-inline-insns-single limit reached [-Winline] ./include/linux/slab.h:605:9: note: called from here ./include/linux/slab.h: In function ‘pipe_resize_ring’: ./include/linux/slab.h:586:121: warning: inlining failed in call to ‘kmalloc_array.constprop’: --param max-inline-insns-single limit reached [-Winline] ./include/linux/slab.h:605:9: note: called from here So the problem only shifts to not inlininig kmalloc_array. (that is why it would be nice to update kernel with the easier get_order) However it shows different problem: ipa-cp produces cone of kmalloc_array since it is always used by constant size, but the clone does not update the predicates, so we lose track about the parameter being constant and that is why we optimize out only late. Martin, I think this is caused by long lasting TODO in ipa_fn_summary_t::duplicate and probably we should implement it: based on the known partial assignment of params to constant we should fold the conditions in predicates. Indeed with ./xgcc -B ./ -O2 -Winline pipe.i -fno-ipa-cp --param inline-heuristics-hint-percent=300 the warning goes away. We still need the stronger hint though.
gcc/ChangeLog: 2020-10-20 Jan Hubicka <hubi...@ucw.cz> PR c/97445 * ipa-fnsummary.c (ipa_dump_hints): Handle INLINE_HINT_builtin_constant_p. (ipa_fn_summary::~ipa_fn_summary): Free builtin_constant_p_parms. (ipa_fn_summary_t::duplicate): Copy builtin_constant_p_parms. (ipa_dump_fn_summary): Dump builtin_constant_p_parms. (set_cond_stmt_execution_predicate): Compute builtin_constant_p_parms. (ipa_call_context::estimate_size_and_time): Set INLINE_HINT_builtin_constant_p. (ipa_merge_fn_summary_after_inlining): Merge builtin_constant_p_parms. (inline_read_section): Stream builtin_constant_p_parms. (ipa_fn_summary_write): Stream builtin_constant_p_parms. * ipa-fnsummary.h (enum ipa_hints_vals): Add INLINE_HINT_builtin_constant_p. (ipa_fn_summary): Add builtin_constant_p_parms. * ipa-inline.c (want_inline_small_function_p): Handle INLINE_HINT_builtin_constant_p. (edge_badness): Handle INLINE_HINT_builtin_constant_p. gcc/testsuite/ChangeLog: 2020-10-20 Jan Hubicka <hubi...@ucw.cz> * gcc.dg/ipa/inlinehint-5.c: New test. diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 9e3eda4d3cb..4292f1f5fe7 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -141,6 +141,11 @@ ipa_dump_hints (FILE *f, ipa_hints hints) hints &= ~INLINE_HINT_known_hot; fprintf (f, " known_hot"); } + if (hints & INLINE_HINT_builtin_constant_p) + { + hints &= ~INLINE_HINT_builtin_constant_p; + fprintf (f, " builtin_constant_p"); + } gcc_assert (!hints); } @@ -751,6 +756,7 @@ ipa_fn_summary::~ipa_fn_summary () vec_free (call_size_time_table); vec_free (loop_iterations); vec_free (loop_strides); + vec_free (builtin_constant_p_parms); } void @@ -805,6 +811,10 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, that are known to be false or true. */ info->conds = vec_safe_copy (info->conds); + if (info->builtin_constant_p_parms) + info->builtin_constant_p_parms + = vec_safe_copy (info->builtin_constant_p_parms); + /* When there are any replacements in the function body, see if we can figure out that something was optimized out. */ if (ipa_node_params_sum && dst->clone.tree_map) @@ -1066,6 +1076,13 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) fprintf (f, " inlinable"); if (s->fp_expressions) fprintf (f, " fp_expression"); + if (s->builtin_constant_p_parms) + { + fprintf (f, " builtin_constant_p_parms"); + for (unsigned int i = 0; + i < s->builtin_constant_p_parms->length (); i++) + fprintf (f, " %i", (*s->builtin_constant_p_parms)[i]); + } fprintf (f, "\n global time: %f\n", s->time.to_double ()); fprintf (f, " self size: %i\n", ss->self_size); fprintf (f, " global size: %i\n", ss->size); @@ -1598,6 +1615,8 @@ set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, op2 = gimple_call_arg (set_stmt, 0); if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos)) return; + if (!aggpos.by_ref) + vec_safe_push (summary->builtin_constant_p_parms, index); FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) { predicate p = add_condition (summary, params_summary, index, @@ -3717,6 +3736,9 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, hints |= INLINE_HINT_in_scc; if (DECL_DECLARED_INLINE_P (m_node->decl)) hints |= INLINE_HINT_declared_inline; + if (info->builtin_constant_p_parms + && DECL_DECLARED_INLINE_P (m_node->decl)) + hints |= INLINE_HINT_builtin_constant_p; ipa_freqcounting_predicate *fcp; for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++) @@ -4044,8 +4066,13 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) operand_map[i] = map; gcc_assert (map < ipa_get_param_count (params_summary)); } + int *ip; + for (i = 0; vec_safe_iterate (callee_info->builtin_constant_p_parms, + i, &ip); i++) + if (*ip < count && operand_map[*ip] > 0) + vec_safe_push (info->builtin_constant_p_parms, operand_map[*ip]); } - sreal freq = edge->sreal_frequency (); + sreal freq = edge->sreal_frequency (); for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) { predicate p; @@ -4443,6 +4470,15 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, vec_safe_push (info->loop_strides, fcp); } } + count2 = streamer_read_uhwi (&ib); + if (info && count2) + vec_safe_reserve_exact (info->builtin_constant_p_parms, count2); + for (j = 0; j < count2; j++) + { + int parm = streamer_read_uhwi (&ib); + if (info) + info->builtin_constant_p_parms->quick_push (parm); + } for (e = node->callees; e; e = e->next_callee) read_ipa_call_summary (&ib, e, info != NULL); for (e = node->indirect_calls; e; e = e->next_callee) @@ -4618,6 +4654,13 @@ ipa_fn_summary_write (void) fcp->predicate->stream_out (ob); fcp->freq.stream_out (ob); } + streamer_write_uhwi (ob, + vec_safe_length + (info->builtin_constant_p_parms)); + int *ip; + for (i = 0; vec_safe_iterate (info->builtin_constant_p_parms, i, &ip); + i++) + streamer_write_uhwi (ob, *ip); for (edge = cnode->callees; edge; edge = edge->next_callee) write_ipa_call_summary (ob, edge); for (edge = cnode->indirect_calls; edge; edge = edge->next_callee) diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index f4dd5b85ab9..4c4a90dd622 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -49,7 +49,10 @@ enum ipa_hints_vals { Set by simple_edge_hints in ipa-inline-analysis.c. */ INLINE_HINT_cross_module = 64, /* We know that the callee is hot by profile. */ - INLINE_HINT_known_hot = 128 + INLINE_HINT_known_hot = 128, + /* There is builtin_constant_p dependent on parameter which is usually + a strong hint to inline. */ + INLINE_HINT_builtin_constant_p = 256 }; typedef int ipa_hints; @@ -123,10 +126,12 @@ public: ipa_fn_summary () : min_size (0), inlinable (false), single_caller (false), - fp_expressions (false), estimated_stack_size (false), + fp_expressions (false), + estimated_stack_size (false), time (0), conds (NULL), size_time_table (NULL), call_size_time_table (NULL), loop_iterations (NULL), loop_strides (NULL), + builtin_constant_p_parms (NULL), growth (0), scc_no (0) { } @@ -140,6 +145,7 @@ public: time (s.time), conds (s.conds), size_time_table (s.size_time_table), call_size_time_table (NULL), loop_iterations (s.loop_iterations), loop_strides (s.loop_strides), + builtin_constant_p_parms (s.builtin_constant_p_parms), growth (s.growth), scc_no (s.scc_no) {} @@ -182,6 +188,8 @@ public: vec<ipa_freqcounting_predicate, va_gc> *loop_iterations; /* Predicates on when some loops in the function can have known strides. */ vec<ipa_freqcounting_predicate, va_gc> *loop_strides; + /* Parameters tested by builtin_constant_p. */ + vec<int, va_gc> * GTY((skip)) builtin_constant_p_parms; /* Estimated growth for inlining all copies of the function before start of small functions inlining. This value will get out of date as the callers are duplicated, but diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 225a0140725..99e6002149b 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -878,7 +878,8 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) bool apply_hints = (hints & (INLINE_HINT_indirect_call | INLINE_HINT_known_hot | INLINE_HINT_loop_iterations - | INLINE_HINT_loop_stride)); + | INLINE_HINT_loop_stride + | INLINE_HINT_builtin_constant_p)); if (growth <= opt_for_fn (to->decl, param_max_inline_insns_size)) @@ -1314,7 +1315,8 @@ edge_badness (struct cgraph_edge *edge, bool dump) badness = badness.shift (badness > 0 ? 4 : -4); if ((hints & (INLINE_HINT_indirect_call | INLINE_HINT_loop_iterations - | INLINE_HINT_loop_stride)) + | INLINE_HINT_loop_stride + | INLINE_HINT_builtin_constant_p)) || callee_info->growth <= 0) badness = badness.shift (badness > 0 ? -2 : 2); if (hints & (INLINE_HINT_same_scc)) diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c new file mode 100644 index 00000000000..3dd3a11dd3e --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2 -fdump-ipa-inline-details -fno-early-inlining " } */ +/* { dg-add-options bind_pic_locally } */ +int j,k,l; +int test3(int); +int test4(int); + +static inline int +test2(int i) +{ + if (__builtin_constant_p (i)) + { + switch (i) + { + case 1: return j; + case 2: return k; + case 3: return l; + } + } + else return test3(i)+test4(i); +} + +static inline int +test (int i) +{ + return test2(i) + test2(i+1) + test3 (i) + test3(i) + test3(i); +} + +int +run (int i) +{ + return test (i); +} +/* { dg-final { scan-ipa-dump-times "hints: declared_inline builtin_constant_p" 3 "inline" } } */