Hello,

as discussed in the PR, this seems like a simple enough approach to handle FENV functionality safely, while keeping it possible to implement optimizations in the future.

Some key missing things:
- handle C, not just C++ (I don't care, but some people probably do)
- handle vectors (for complex, I don't know what it means)

Then flag_trapping_math should also enable this path, meaning that we should stop making it the default, or performance will suffer.

Nice to have:
- parse the fenv_access pragma and make it set flag_rounding_math or similar.
- sqrt

All the optimizations can come later (I count having different functions for flag_rounding_math and flag_trapping_math as one such optimization).


I put the lowering in its own pass, because it needs to run at -O0 and there aren't that many passes at -O0 where I could put it. It would probably be better to handle this directly during expansion, but with my knowledge of the compiler it was easier to lower it before.

This patch passes bootstrap+regtest on x86_64. I expect it may break a few testcases on some targets (arm?) that check that we optimize some things even with -frounding-math, but as far as I am concerned those do not count as regressions because -frounding-math was never really implemented, so I would encourage target maintainers to xfail those for now.

I'd like to handle this incrementally, rather than wait for a mega-patch that does everything, if that's ok. For instance, I didn't handle vectors in this first patch because the interaction with vector lowering was not completely obvious. Plus it may help get others to implement some parts of it ;-)

2019-06-24  Marc Glisse  <marc.gli...@inria.fr>

        PR middle-end/34678
gcc/cp/
        * typeck.c (cp_build_binary_op): Generate internal functions for float
        operations with -frounding-math.

gcc/
        * Makefile.in: Handle new file gimple-lower-fenv.cc.
        * gimple-lower-fenv.cc: New file.
        * internal-fn.c (expand_FENV_PLUS, expand_FENV_MINUS, expand_FENV_MULT,
        expand_FENV_DIV): New functions.
        * internal-fn.def (FENV_PLUS, FENV_MINUS, FENV_MULT, FENV_DIV): New
        internal functions.
        * passes.def (pass_lower_fenv): New pass.
        * tree-pass.h (make_pass_lower_fenv): Declare new function.

--
Marc Glisse
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 272586)
+++ gcc/Makefile.in	(working copy)
@@ -1315,20 +1315,21 @@ OBJS = \
 	gimple.o \
 	gimple-builder.o \
 	gimple-expr.o \
 	gimple-iterator.o \
 	gimple-fold.o \
 	gimple-laddress.o \
 	gimple-loop-interchange.o \
 	gimple-loop-jam.o \
 	gimple-loop-versioning.o \
 	gimple-low.o \
+	gimple-lower-fenv.o \
 	gimple-pretty-print.o \
 	gimple-ssa-backprop.o \
 	gimple-ssa-evrp.o \
 	gimple-ssa-evrp-analyze.o \
 	gimple-ssa-isolate-paths.o \
 	gimple-ssa-nonnull-compare.o \
 	gimple-ssa-split-paths.o \
 	gimple-ssa-store-merging.o \
 	gimple-ssa-strength-reduction.o \
 	gimple-ssa-sprintf.o \
Index: gcc/cp/typeck.c
===================================================================
--- gcc/cp/typeck.c	(revision 272586)
+++ gcc/cp/typeck.c	(working copy)
@@ -5544,20 +5544,47 @@ cp_build_binary_op (const op_location_t
 	  if (TREE_TYPE (cop0) != orig_type)
 	    cop0 = cp_convert (orig_type, op0, complain);
 	  if (TREE_TYPE (cop1) != orig_type)
 	    cop1 = cp_convert (orig_type, op1, complain);
 	  instrument_expr = ubsan_instrument_division (location, cop0, cop1);
 	}
       else if (doing_shift && sanitize_flags_p (SANITIZE_SHIFT))
 	instrument_expr = ubsan_instrument_shift (location, code, op0, op1);
     }
 
+  // FIXME: vectors (and complex?) as well
+  if (flag_rounding_math && SCALAR_FLOAT_TYPE_P (build_type))
+    {
+      bool do_fenv_subst = true;
+      internal_fn ifn;
+      switch (resultcode)
+	{
+	case PLUS_EXPR:
+	  ifn = IFN_FENV_PLUS;
+	  break;
+	case MINUS_EXPR:
+	  ifn = IFN_FENV_MINUS;
+	  break;
+	case MULT_EXPR:
+	  ifn = IFN_FENV_MULT;
+	  break;
+	case RDIV_EXPR:
+	  ifn = IFN_FENV_DIV;
+	  break;
+	default:
+	  do_fenv_subst = false;
+	}
+      if (do_fenv_subst)
+	return build_call_expr_internal_loc (location, ifn, build_type,
+					     2, op0, op1);
+    }
+
   result = build2_loc (location, resultcode, build_type, op0, op1);
   if (final_type != 0)
     result = cp_convert (final_type, result, complain);
 
   if (instrument_expr != NULL)
     result = build2 (COMPOUND_EXPR, TREE_TYPE (result),
 		     instrument_expr, result);
 
   if (!processing_template_decl)
     {
Index: gcc/gimple-lower-fenv.cc
===================================================================
--- gcc/gimple-lower-fenv.cc	(nonexistent)
+++ gcc/gimple-lower-fenv.cc	(working copy)
@@ -0,0 +1,144 @@
+/* Lower correctly rounded operations.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-iterator.h"
+
+/* Create a pass-through inline asm barrier from IN to OUT.  */
+static gasm*
+asm_barrier (tree out, tree in)
+{
+  vec<tree, va_gc> *inputs = NULL, *outputs = NULL;
+  if (out)
+    {
+      vec_safe_push (inputs,
+		     build_tree_list (build_tree_list
+				      (NULL_TREE, build_string (2, "0")), in));
+      vec_safe_push (outputs,
+		     build_tree_list (build_tree_list
+				      (NULL_TREE, build_string (3, "=g")),
+				      out));
+    }
+  else
+    {
+      vec_safe_push (inputs,
+		     build_tree_list (build_tree_list
+				      (NULL_TREE, build_string (2, "g")), in));
+    }
+  gasm *g = gimple_build_asm_vec ("", inputs, outputs, NULL, NULL);
+  gimple_asm_set_volatile (g, true);
+  if (out)
+    SSA_NAME_DEF_STMT (out) = g;
+  return g;
+}
+
+/* A simple pass that attempts to fold all fenv internal functions.  */
+
+namespace {
+
+const pass_data pass_data_lower_fenv =
+{
+  GIMPLE_PASS, /* type */
+  "lfenv", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_lower_fenv : public gimple_opt_pass
+{
+public:
+  pass_lower_fenv (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_lower_fenv, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual unsigned int execute (function *);
+}; // class pass_lower_fenv
+
+unsigned int
+pass_lower_fenv::execute (function *fun)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator i;
+      for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+	{
+	  gimple *stmt = gsi_stmt (i);
+	  if (gimple_code (stmt) != GIMPLE_CALL
+	      || !gimple_call_internal_p (stmt))
+	    continue;
+
+	  tree_code code;
+	  switch (gimple_call_internal_fn (stmt))
+	    {
+	    case IFN_FENV_PLUS:
+	      code = PLUS_EXPR;
+	      break;
+	    case IFN_FENV_MINUS:
+	      code = MINUS_EXPR;
+	      break;
+	    case IFN_FENV_MULT:
+	      code = MULT_EXPR;
+	      break;
+	    case IFN_FENV_DIV:
+	      code = RDIV_EXPR;
+	      break;
+	    default:
+	      continue;
+	    }
+
+	  tree op0 = gimple_call_arg (stmt, 0);
+	  tree op1 = gimple_call_arg (stmt, 1);
+	  tree ftype = TREE_TYPE (op0);
+	  tree newop0 = make_ssa_name (ftype);
+	  tree newop1 = make_ssa_name (ftype);
+	  gsi_insert_before (&i, asm_barrier (newop0, op0), GSI_SAME_STMT);
+	  gsi_insert_before (&i, asm_barrier (newop1, op1), GSI_SAME_STMT);
+
+	  tree lhs = gimple_call_lhs (stmt);
+	  tree newlhs = make_ssa_name (ftype);
+	  gimple *new_stmt = gimple_build_assign (newlhs, code, newop0, newop1);
+	  gsi_insert_before (&i, new_stmt, GSI_SAME_STMT);
+	  gsi_replace (&i, asm_barrier (lhs, newlhs), false);
+	  unlink_stmt_vdef (stmt);
+	  release_ssa_name (gimple_vdef (stmt));
+	}
+    }
+  return 0;
+}
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_lower_fenv (gcc::context *ctxt)
+{
+  return new pass_lower_fenv (ctxt);
+}
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c	(revision 272586)
+++ gcc/internal-fn.c	(working copy)
@@ -2869,20 +2869,46 @@ expand_DIVMOD (internal_fn, gcall *call_
 }
 
 /* Expand a NOP.  */
 
 static void
 expand_NOP (internal_fn, gcall *)
 {
   /* Nothing.  But it shouldn't really prevail.  */
 }
 
+/* This should get expanded in the wmul pass.  */
+
+static void
+expand_FENV_PLUS (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
+static void
+expand_FENV_MINUS (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
+static void
+expand_FENV_MULT (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
+static void
+expand_FENV_DIV (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
 /* Expand a call to FN using the operands in STMT.  FN has a single
    output operand and NARGS input operands.  */
 
 static void
 expand_direct_optab_fn (internal_fn fn, gcall *stmt, direct_optab optab,
 			unsigned int nargs)
 {
   expand_operand *ops = XALLOCAVEC (expand_operand, nargs + 1);
 
   tree_pair types = direct_internal_fn_types (fn, stmt);
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def	(revision 272586)
+++ gcc/internal-fn.def	(working copy)
@@ -345,16 +345,22 @@ DEF_INTERNAL_FN (FALLTHROUGH, ECF_LEAF |
 
 /* To implement __builtin_launder.  */
 DEF_INTERNAL_FN (LAUNDER, ECF_LEAF | ECF_NOTHROW | ECF_NOVOPS, NULL)
 
 /* Divmod function.  */
 DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
 
 /* A NOP function with arbitrary arguments and return value.  */
 DEF_INTERNAL_FN (NOP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 
+/* float operations with rounding / exception flags.  */
+DEF_INTERNAL_FN (FENV_PLUS, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (FENV_MINUS, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (FENV_MULT, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (FENV_DIV, ECF_LEAF | ECF_NOTHROW, NULL)
+
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_FLT_FLOATN_FN
 #undef DEF_INTERNAL_SIGNED_OPTAB_FN
 #undef DEF_INTERNAL_OPTAB_FN
 #undef DEF_INTERNAL_FN
Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 272586)
+++ gcc/passes.def	(working copy)
@@ -377,20 +377,21 @@ along with GCC; see the file COPYING3.
   PUSH_INSERT_PASSES_WITHIN (pass_tm_init)
       NEXT_PASS (pass_tm_mark);
       NEXT_PASS (pass_tm_memopt);
       NEXT_PASS (pass_tm_edges);
   POP_INSERT_PASSES ()
   NEXT_PASS (pass_simduid_cleanup);
   NEXT_PASS (pass_vtable_verify);
   NEXT_PASS (pass_lower_vaarg);
   NEXT_PASS (pass_lower_vector);
   NEXT_PASS (pass_lower_complex_O0);
+  NEXT_PASS (pass_lower_fenv);
   NEXT_PASS (pass_sancov_O0);
   NEXT_PASS (pass_lower_switch_O0);
   NEXT_PASS (pass_asan_O0);
   NEXT_PASS (pass_tsan_O0);
   NEXT_PASS (pass_sanopt);
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(revision 272586)
+++ gcc/tree-pass.h	(working copy)
@@ -617,20 +617,21 @@ extern rtl_opt_pass *make_pass_shorten_b
 extern rtl_opt_pass *make_pass_set_nothrow_function_flags (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_dwarf2_frame (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_final (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_rtl_seqabstr (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_release_ssa_names (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_early_inline (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_lower_fenv (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
 
 extern bool execute_one_pass (opt_pass *);
 extern void execute_pass_list (function *, opt_pass *);
 extern void execute_ipa_pass_list (opt_pass *);
 extern void execute_ipa_summary_passes (ipa_opt_pass_d *);
 extern void execute_all_ipa_transforms (void);
 extern void execute_all_ipa_stmt_fixups (struct cgraph_node *, gimple **);

Reply via email to