> On Thu, 21 Apr 2016, Jan Hubicka wrote:
> 
> > Hi,
> > this patch implements the long promised logic to inline across -ffast-math
> > boundary when eitehr caller or callee has no fp operations in it.  This is
> > needed to resolve code quality regression on Firefox with LTO where
> > -O3/-O2/-Ofast flags are combined and we fail to inline a lot of comdats
> > otherwise.
> > 
> > Bootstrapped/regtested x86_64-linux. Ricahrd, I would like to know your 
> > opinion
> > on fp_expression_p predicate - it is bit ugly but I do not know how to 
> > implement
> > it better.
> > 
> > We still won't inline -O1 code into -O2+ because flag_strict_overflow 
> > differs.
> > I will implement similar logic for overflows incrementally. Similarly 
> > flag_errno_math
> > can be handled better, but I am not sure it matters - I think wast majority 
> > of time
> > users set errno_math in sync with other -ffast-math implied flags.
> 
> Note that for reasons PR70586 shows (const functions having possible
> trapping side-effect because of FP math or division) we'd like to
> have sth like "uses FP math" "uses possibly trapping integer math"
> "uses integer math with undefined overflow" on a per function level
> and propagated alongside pure/const/nothrow state.
> 
> So maybe you can fit that into a more suitable place than just the
> inliner (which of course is interested in "uses FP math locally",
> not the transitive answer we need for PR70586).

We don't really have much more suitable place - ipa-inline-analysis is 
doing most of the analysis of function body that is usefull for IPA passes,
not only for inliner. It should be renamed perhaps to something like
function_body_summary.  I will do that later this stage1.

For PR70686 in addition to transitive answer we will need to know that
the transformation is win. Const function may take a lot of time and
introducing new call on code path that did not used it previously is
bad idea unless we know that the function is very cheap (which may
be true only for fast builtins, I don't know)

This patch impleemnts the suggested check for presence of FP parameters.
We can play with special casing the moves incrementally.

Bootstrapped/regtested x86_64-linux.

Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog (revision 235765)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,7 @@
+2016-05-02  Jan Hubicka  <hubi...@ucw.cz>
+
+       * gcc.dg/ipa/inline-8.c: New testcase.
+
 2016-05-02  Jakub Jelinek  <ja...@redhat.com>
 
        PR rtl-optimization/70467
Index: testsuite/gcc.dg/ipa/inline-8.c
===================================================================
--- testsuite/gcc.dg/ipa/inline-8.c     (revision 0)
+++ testsuite/gcc.dg/ipa/inline-8.c     (working copy)
@@ -0,0 +1,36 @@
+/* Verify that we do not inline isnanf test info -ffast-math code but that we
+   do inline trivial functions across -Ofast boundary.  */
+/* { dg-do run } */
+/* { dg-options "-O2"  } */
+#include <math.h>
+/* Can't be inlined because isnanf will be optimized out.  */
+int
+cmp (float a)
+{
+  return isnanf (a);
+}
+/* Can be inlined.  */
+int
+move (int a)
+{
+  return a;
+}
+float a;
+void
+set ()
+{
+ a=nan("");
+}
+float b;
+__attribute__ ((optimize("Ofast")))
+int
+main()
+{
+  b++;
+  if (cmp(a))
+    __builtin_abort ();
+  float a = move (1);
+  if (!__builtin_constant_p (a))
+    __builtin_abort ();
+  return 0;
+}
Index: ChangeLog
===================================================================
--- ChangeLog   (revision 235765)
+++ ChangeLog   (working copy)
@@ -1,3 +1,18 @@
+2016-05-02  Jan Hubicka  <hubi...@ucw.cz>
+
+       * ipa-inline-analysis.c (reset_inline_summary): Clear fp_expressions
+       (dump_inline_summary): Dump it.
+       (fp_expression_p): New predicate.
+       (estimate_function_body_sizes): Use it.
+       (inline_merge_summary): Merge fp_expressions.
+       (inline_read_section): Read fp_expressions.
+       (inline_write_summary): Write fp_expressions.
+       * ipa-inline.c (can_inline_edge_p): Permit inlining across fp math
+       codegen boundary if either caller or callee is !fp_expressions.
+       * ipa-inline.h (inline_summary): Add fp_expressions.
+       * ipa-inline-transform.c (inline_call): When inlining !fp_expressions
+       to fp_expressions be sure the fp generation flags are updated.
+
 2016-05-02  Jakub Jelinek  <ja...@redhat.com>
 
        PR rtl-optimization/70467
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c       (revision 235693)
+++ ipa-inline-analysis.c       (working copy)
@@ -850,7 +850,8 @@ evaluate_conditions_for_known_args (stru
          if (known_aggs.exists ())
            {
              agg = known_aggs[c->operand_num];
-             val = ipa_find_agg_cst_for_param (agg, c->offset, c->by_ref);
+             val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
+                                               c->offset, c->by_ref);
            }
          else
            val = NULL_TREE;
@@ -1069,6 +1070,7 @@ reset_inline_summary (struct cgraph_node
     reset_inline_edge_summary (e);
   for (e = node->indirect_calls; e; e = e->next_callee)
     reset_inline_edge_summary (e);
+  info->fp_expressions = false;
 }
 
 /* Hook that is called by cgraph.c when a node is removed.  */
@@ -1423,6 +1425,8 @@ dump_inline_summary (FILE *f, struct cgr
        fprintf (f, " inlinable");
       if (s->contains_cilk_spawn)
        fprintf (f, " contains_cilk_spawn");
+      if (s->fp_expressions)
+       fprintf (f, " fp_expression");
       fprintf (f, "\n  self time:       %i\n", s->self_time);
       fprintf (f, "  global time:     %i\n", s->time);
       fprintf (f, "  self size:       %i\n", s->self_size);
@@ -2459,6 +2463,21 @@ clobber_only_eh_bb_p (basic_block bb, bo
   return true;
 }
 
+/* Return true if STMT compute a floating point expression that may be affected
+   by -ffast-math and similar flags.  */
+
+static bool
+fp_expression_p (gimple *stmt)
+{
+  ssa_op_iter i;
+  tree op;
+
+  FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
+    if (FLOAT_TYPE_P (TREE_TYPE (op)))
+      return true;
+  return false;
+}
+
 /* Compute function body size parameters for NODE.
    When EARLY is true, we compute only simple summaries without
    non-trivial predicates to drive the early inliner.  */
@@ -2733,6 +2752,13 @@ estimate_function_body_sizes (struct cgr
                                       this_time * (2 - prob), &p);
                }
 
+             if (!info->fp_expressions && fp_expression_p (stmt))
+               {
+                 info->fp_expressions = true;
+                 if (dump_file)
+                   fprintf (dump_file, "   fp_expression set\n");
+               }
+
              gcc_assert (time >= 0);
              gcc_assert (size >= 0);
            }
@@ -3577,6 +3603,8 @@ inline_merge_summary (struct cgraph_edge
   else
     toplev_predicate = true_predicate ();
 
+  info->fp_expressions |= callee_info->fp_expressions;
+
   if (callee_info->conds)
     evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL);
   if (ipa_node_params_sum && callee_info->conds)
@@ -4229,6 +4257,7 @@ inline_read_section (struct lto_file_dec
       bp = streamer_read_bitpack (&ib);
       info->inlinable = bp_unpack_value (&bp, 1);
       info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
+      info->fp_expressions = bp_unpack_value (&bp, 1);
 
       count2 = streamer_read_uhwi (&ib);
       gcc_assert (!info->conds);
@@ -4395,6 +4424,7 @@ inline_write_summary (void)
          bp = bitpack_create (ob->main_stream);
          bp_pack_value (&bp, info->inlinable, 1);
          bp_pack_value (&bp, info->contains_cilk_spawn, 1);
+         bp_pack_value (&bp, info->fp_expressions, 1);
          streamer_write_bitpack (&bp);
          streamer_write_uhwi (ob, vec_safe_length (info->conds));
          for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
Index: ipa-inline.c
===================================================================
--- ipa-inline.c        (revision 235693)
+++ ipa-inline.c        (working copy)
@@ -338,7 +338,7 @@ can_inline_edge_p (struct cgraph_edge *e
   else if (e->call_stmt_cannot_inline_p)
     {
       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
-        e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
+        e->inline_failed = e->caller->thunk.thunk_p ? CIF_THUNK : 
CIF_MISMATCHED_ARGUMENTS;
       inlinable = false;
     }
   /* Don't inline if the functions have different EH personalities.  */
@@ -396,6 +396,8 @@ can_inline_edge_p (struct cgraph_edge *e
             (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
              && lookup_attribute ("always_inline",
                                   DECL_ATTRIBUTES (callee->decl)));
+      inline_summary *caller_info = inline_summaries->get (caller);
+      inline_summary *callee_info = inline_summaries->get (callee);
 
      /* Until GCC 4.9 we did not check the semantics alterning flags
        bellow and inline across optimization boundry.
@@ -417,16 +419,21 @@ can_inline_edge_p (struct cgraph_edge *e
                   == !opt_for_fn (callee->decl, optimize) || !always_inline))
              || check_match (flag_wrapv)
              || check_match (flag_trapv)
-             /* Strictly speaking only when the callee uses FP math.  */
-             || check_maybe_up (flag_rounding_math)
-             || check_maybe_up (flag_trapping_math)
-             || check_maybe_down (flag_unsafe_math_optimizations)
-             || check_maybe_down (flag_finite_math_only)
-             || check_maybe_up (flag_signaling_nans)
-             || check_maybe_down (flag_cx_limited_range)
-             || check_maybe_up (flag_signed_zeros)
-             || check_maybe_down (flag_associative_math)
-             || check_maybe_down (flag_reciprocal_math)
+             /* When caller or callee does FP math, be sure FP codegen flags
+                compatible.  */
+             || ((caller_info->fp_expressions && callee_info->fp_expressions)
+                 && (check_maybe_up (flag_rounding_math)
+                     || check_maybe_up (flag_trapping_math)
+                     || check_maybe_down (flag_unsafe_math_optimizations)
+                     || check_maybe_down (flag_finite_math_only)
+                     || check_maybe_up (flag_signaling_nans)
+                     || check_maybe_down (flag_cx_limited_range)
+                     || check_maybe_up (flag_signed_zeros)
+                     || check_maybe_down (flag_associative_math)
+                     || check_maybe_down (flag_reciprocal_math)
+                     /* Strictly speaking only when the callee contains 
function
+                        calls that may end up setting errno.  */
+                     || check_maybe_up (flag_errno_math)))
              /* We do not want to make code compiled with exceptions to be
                 brought into a non-EH function unless we know that the callee
                 does not throw.
@@ -435,9 +442,6 @@ can_inline_edge_p (struct cgraph_edge *e
                  && DECL_FUNCTION_PERSONALITY (callee->decl))
              || (check_maybe_up (flag_exceptions)
                  && DECL_FUNCTION_PERSONALITY (callee->decl))
-             /* Strictly speaking only when the callee contains function
-                calls that may end up setting errno.  */
-             || check_maybe_up (flag_errno_math)
              /* When devirtualization is diabled for callee, it is not safe
                 to inline it as we possibly mangled the type info.
                 Allow early inlining of always inlines.  */
Index: ipa-inline.h
===================================================================
--- ipa-inline.h        (revision 235693)
+++ ipa-inline.h        (working copy)
@@ -132,6 +132,8 @@ struct GTY(()) inline_summary
   /* True wen there is only one caller of the function before small function
      inlining.  */
   unsigned int single_caller : 1;
+  /* True if function contains any floating point expressions.  */
+  unsigned int fp_expressions : 1;
 
   /* Information about function that will result after applying all the
      inline decisions present in the callgraph.  Generally kept up to
Index: ipa-inline-transform.c
===================================================================
--- ipa-inline-transform.c      (revision 235693)
+++ ipa-inline-transform.c      (working copy)
@@ -338,6 +338,63 @@ inline_call (struct cgraph_edge *e, bool
       DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
         = build_optimization_node (&opts);
     }
+  inline_summary *caller_info = inline_summaries->get (to);
+  inline_summary *callee_info = inline_summaries->get (callee);
+  if (!caller_info->fp_expressions && callee_info->fp_expressions)
+    {
+      caller_info->fp_expressions = true;
+      if (opt_for_fn (callee->decl, flag_rounding_math)
+         != opt_for_fn (to->decl, flag_rounding_math)
+         || opt_for_fn (callee->decl, flag_trapping_math)
+            != opt_for_fn (to->decl, flag_trapping_math)
+         || opt_for_fn (callee->decl, flag_unsafe_math_optimizations)
+            != opt_for_fn (to->decl, flag_unsafe_math_optimizations)
+         || opt_for_fn (callee->decl, flag_finite_math_only)
+            != opt_for_fn (to->decl, flag_finite_math_only)
+         || opt_for_fn (callee->decl, flag_signaling_nans)
+            != opt_for_fn (to->decl, flag_signaling_nans)
+         || opt_for_fn (callee->decl, flag_cx_limited_range)
+            != opt_for_fn (to->decl, flag_cx_limited_range)
+         || opt_for_fn (callee->decl, flag_signed_zeros)
+            != opt_for_fn (to->decl, flag_signed_zeros)
+         || opt_for_fn (callee->decl, flag_associative_math)
+            != opt_for_fn (to->decl, flag_associative_math)
+         || opt_for_fn (callee->decl, flag_reciprocal_math)
+            != opt_for_fn (to->decl, flag_reciprocal_math)
+         || opt_for_fn (callee->decl, flag_errno_math)
+            != opt_for_fn (to->decl, flag_errno_math))
+       {
+         struct gcc_options opts = global_options;
+
+         cl_optimization_restore (&opts, opts_for_fn (to->decl));
+         opts.x_flag_rounding_math
+           = opt_for_fn (callee->decl, flag_rounding_math);
+         opts.x_flag_trapping_math
+           = opt_for_fn (callee->decl, flag_trapping_math);
+         opts.x_flag_unsafe_math_optimizations
+           = opt_for_fn (callee->decl, flag_unsafe_math_optimizations);
+         opts.x_flag_finite_math_only
+           = opt_for_fn (callee->decl, flag_finite_math_only);
+         opts.x_flag_signaling_nans
+           = opt_for_fn (callee->decl, flag_signaling_nans);
+         opts.x_flag_cx_limited_range
+           = opt_for_fn (callee->decl, flag_cx_limited_range);
+         opts.x_flag_signed_zeros
+           = opt_for_fn (callee->decl, flag_signed_zeros);
+         opts.x_flag_associative_math
+           = opt_for_fn (callee->decl, flag_associative_math);
+         opts.x_flag_reciprocal_math
+           = opt_for_fn (callee->decl, flag_reciprocal_math);
+         opts.x_flag_errno_math
+           = opt_for_fn (callee->decl, flag_errno_math);
+         if (dump_file)
+           fprintf (dump_file, "Copying FP flags from %s:%i to %s:%i\n",
+                    callee->name (), callee->order, to->name (), to->order);
+         build_optimization_node (&opts);
+         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
+            = build_optimization_node (&opts);
+       }
+    }
 
   /* If aliases are involved, redirect edge to the actual destination and
      possibly remove the aliases.  */

Reply via email to