Hi all,

This is an early draft I'm working on to add fegetround , feclearexcept
and feraiseexcept as builtins on rs6000.  This is my first patch so I
welcome any and all feedback.  Foremost I have some questions to ask as
I got stuck on some problems.


Q1) How to implement a target specific builtin for a C standard
    function?

More specifically, how to make gcc use a rs6000 builtin for a
standard C function? Right now, I am getting a double define of the
builtin.  I don't know if define is the right word for it, may be
register an implementation?

The context is that I am creating builtin optimizations for fegetround,
feclearexcept and feraiseexcept.  Early on I discovered that there is
this file that defines builtins for all C library but not actually
implements them (in gcc/builtins.def) and trying to redefine them in
gcc/config/rs6000/rs6000-builtin.def ends up with a name clash.  So I
implemented the builtins with a suffix in its names and pushed this
problem for later...  And this later time is now.

I tried my best to find something about it on the gcc internal
documentation but I may have missed it.

So this is my question, how to I link the builtin defined in
gcc/builtins.def to use my implementation on rs6000? If someone has a
pointer about it or a patch that does it for some other c function (in
any target architecture) that would be great.


Q2) How to fallback to the default behavior of the function call when
    the builtin is not suitable for the parameters?

Here, it is more specifically for feclearexcept and feraiseexcept.  The
builtin should only be used in the case of the parameter input is a
constant number with only 1bit mask (to work on only one exception).
Right now, I make the correctly check and it works (I validate the
builtins using a name suffix to avoid the problem mentioned in Q1)
But It aborts when the input is not valid instead of falling back to a
function call.


Q3) Are the implementations for the builtins more or less on the
    right places?

The first one I did was fegetround and I based it on ppc_get_timebase
and other related builtins, so I used a define_expand on rs6000.md, but
when I was working on the fe*except I was basing it on other builtins
and ended up implementing it all on rs6000-call.c, but I am not sure if
there is a canonical way of doing it one way or another.


o/
Raoni Fassina Firmino

---- 8< ----

This optimizations were originally in glibc, but was removed
and sugested that they were a good fit as gcc builtins[1].

The associated bugreport: PR target/94193

[1] https://sourceware.org/legacy-ml/libc-alpha/2020-03/msg00047.html
    https://sourceware.org/legacy-ml/libc-alpha/2020-03/msg00080.html

Signed-off-by: Raoni Fassina Firmino <ra...@linux.ibm.com>
---
 gcc/config/rs6000/rs6000-builtin.def | 13 ++++++
 gcc/config/rs6000/rs6000-call.c      | 69 ++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000.md          | 18 ++++++++
 3 files changed, 100 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 54f750c8384..d5ca15141b1 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2567,12 +2567,25 @@ BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, 
"__builtin_ppc_get_timebase",
 BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb",
              RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+BU_SPECIAL_X (RS6000_BUILTIN_FEGETROUND, "__builtin_fegetround",
+             RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
+
 BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs",
              RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
 BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl",
              RS6000_BTM_ALWAYS, RS6000_BTC_MISC)
 
+RS6000_BUILTIN_X (RS6000_BUILTIN_FECLEAREXCEPT, "__builtin_feclearexcept",
+                 RS6000_BTM_ALWAYS,
+                 RS6000_BTC_MISC | RS6000_BTC_UNARY,
+                 CODE_FOR_nothing)
+
+RS6000_BUILTIN_X (RS6000_BUILTIN_FERAISEEXCEPT, "__builtin_feraiseexcept",
+                 RS6000_BTM_ALWAYS,
+                 RS6000_BTC_MISC | RS6000_BTC_UNARY,
+                 CODE_FOR_nothing)
+
 RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf",
                  RS6000_BTM_ALWAYS,
                  RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID,
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 7621d6f5278..af93259e73d 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -8533,6 +8533,53 @@ rs6000_expand_zeroop_builtin (enum insn_code icode, rtx 
target)
 }
 
 
+static rtx
+rs6000_expand_feCRexcept_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+    {
+      error ("%<__builtin_feclearexcept%> and "
+            "%<__builtin_feraiseexcept%> not supported with "
+            "%<-msoft-float%>");
+      return const0_rtx;
+    }
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  if (!CONST_INT_P (op0)
+      || __builtin_popcount (INTVAL(op0)) != 1
+      || INTVAL (op0) == 0x20000000)
+      //|| INTVAL (op0) == FE_INVALID)
+    {
+      error ("argument 1 must be a constant representing one valid exception 
number");
+      return const0_rtx;
+    }
+
+  rtx tmp = gen_rtx_CONST_INT (SImode, __builtin_clz (INTVAL(op0)));
+  pat = GEN_FCN (icode) (tmp);
+  if (!pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+
+  emit_move_insn (target, GEN_INT (0));
+
+  return target;
+}
+
+
 static rtx
 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
 {
@@ -11646,6 +11693,15 @@ rs6000_expand_builtin (tree exp, rtx target, rtx 
subtarget ATTRIBUTE_UNUSED,
         rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
                                             exp);
 
+    case RS6000_BUILTIN_FEGETROUND:
+      return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_fegetround, target);
+
+    case RS6000_BUILTIN_FECLEAREXCEPT:
+      return rs6000_expand_feCRexcept_builtin (CODE_FOR_rs6000_mtfsb0, exp, 
target);
+
+    case RS6000_BUILTIN_FERAISEEXCEPT:
+      return rs6000_expand_feCRexcept_builtin (CODE_FOR_rs6000_mtfsb1, exp, 
target);
+
     case RS6000_BUILTIN_MFFSL:
       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
 
@@ -12029,6 +12085,19 @@ rs6000_init_builtins (void)
                                      NULL_TREE);
   def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
 
+  ftype = build_function_type_list (intSI_type_node, NULL_TREE);
+  def_builtin ("__builtin_fegetround", ftype, RS6000_BUILTIN_FEGETROUND);
+
+  ftype = build_function_type_list (intSI_type_node,
+                                   intSI_type_node,
+                                   NULL_TREE);
+  def_builtin ("__builtin_feclearexcept", ftype, RS6000_BUILTIN_FECLEAREXCEPT);
+
+  ftype = build_function_type_list (intSI_type_node,
+                                   intSI_type_node,
+                                   NULL_TREE);
+  def_builtin ("__builtin_feraiseexcept", ftype, RS6000_BUILTIN_FERAISEEXCEPT);
+
   ftype = build_function_type_list (double_type_node, NULL_TREE);
   def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6173994797c..f935e7118ef 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13600,6 +13600,24 @@
     return "mftb %0";
 })
 
+
+;; int __builtin_fegetround()
+(define_expand "rs6000_fegetround"
+  [(use (match_operand:SI 0 "gpc_reg_operand"))]
+  "TARGET_HARD_FLOAT"
+{
+    rtx tmp_df = gen_reg_rtx (DFmode);
+    emit_insn (gen_rs6000_mffsl (tmp_df));
+
+    rtx tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0);
+    rtx tmp_di_2 = gen_reg_rtx (DImode);
+    emit_insn (gen_anddi3 (tmp_di_2, tmp_di, GEN_INT (0x3LL)));
+    rtx tmp_si = gen_reg_rtx (SImode);
+    tmp_si = simplify_gen_subreg (SImode, tmp_di_2, DImode, 0);
+    emit_move_insn (operands[0], tmp_si);
+    DONE;
+})
+
 
 ;; The ISA 3.0 mffsl instruction is a lower latency instruction
 ;; for reading bits [29:31], [45:51] and [56:63] of the FPSCR.
-- 
2.26.2

Reply via email to