The PowerPC port provides reciprocal sqrt but doesn't implement the
extra incantation to utilize it for sqrtf.

The current implementation re-associates terms in the N-R iteration to
utilize one constant instead of two, but does not provide a
pre-computed estimate multiplied by the source, which requires an
extra multiply at the end.  The cost of the extra load of an FP
constant through the LSU and a register to hold it seems to balance
against the cost of the extra multiply in the VSU, so it's not clear
that re-arranging the computation is beneficial.

Thanks, David

PR target/68609
* config/rs6000/rs6000-protos.h (rs6000_emit_swsqrt): Rename and add
bool arguement.
* config/rs6000/rs6000.c (rs6000_emit_swsqrt): Rename. Add non-reciporcal path.
* config/rs6000/rs6000.md (rsqrt<mode>2): Call new function name.
(sqrt<mode>2): Replace define_insn with define_expand that can call
rs6000_emit_swsqrt.

Index: rs6000-protos.h
===================================================================
--- rs6000-protos.h     (revision 231169)
+++ rs6000-protos.h     (working copy)
@@ -137,7 +137,7 @@
 extern void rs6000_expand_atomic_exchange (rtx op[]);
 extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool);
-extern void rs6000_emit_swrsqrt (rtx, rtx);
+extern void rs6000_emit_swsqrt (rtx, rtx, bool);
 extern void output_toc (FILE *, rtx, int, machine_mode);
 extern rtx rs6000_longcall_ref (rtx);
 extern void rs6000_fatal_bad_address (rtx);
Index: rs6000.c
===================================================================
--- rs6000.c    (revision 231169)
+++ rs6000.c    (working copy)
@@ -32889,7 +32889,7 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool not
    rsqrt.  Assumes no trapping math and finite arguments.  */

 void
-rs6000_emit_swrsqrt (rtx dst, rtx src)
+rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
 {
   machine_mode mode = GET_MODE (src);
   rtx x0 = gen_reg_rtx (mode);
@@ -32922,6 +32922,16 @@ void
   emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
                                              UNSPEC_RSQRT)));

+  /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0).  */
+  if (!recip)
+    {
+      rtx zero = force_reg (mode, CONST0_RTX (mode));
+      rtx target = emit_conditional_move (x0, GT, src, zero, mode,
+                                         x0, zero, mode, 0);
+      if (target != x0)
+       emit_move_insn (x0, target);
+    }
+
   /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
   rs6000_emit_msub (y, src, halfthree, src);

@@ -32938,7 +32948,11 @@ void
       x0 = x1;
     }

-  emit_move_insn (dst, x0);
+  if (!recip)
+    emit_insn (gen_mul (dst, src, x0));
+  else
+    emit_move_insn (dst, x0);
+
   return;
 }
Index: rs6000.md
===================================================================
--- rs6000.md   (revision 231169)
+++ rs6000.md   (working copy)
@@ -4301,7 +4301,7 @@
    (match_operand:RECIPF 1 "gpc_reg_operand" "")]
   "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
 {
-  rs6000_emit_swrsqrt (operands[0], operands[1]);
+  rs6000_emit_swsqrt (operands[0], operands[1], 1);
   DONE;
 })
 ^L
@@ -4426,7 +4426,7 @@
   [(set_attr "type" "<Fs>div")
    (set_attr "fp_type" "fp_div_<Fs>")])

-(define_insn "sqrt<mode>2"
+(define_insn "*sqrt<mode>2_internal"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>")
        (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>")))]
   "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
@@ -4437,6 +4437,23 @@
   [(set_attr "type" "<Fs>sqrt")
    (set_attr "fp_type" "fp_sqrt_<Fs>")])

+(define_expand "sqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+       (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
+   && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
+{
+  if (<MODE>mode == SFmode
+      && RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)
+      && !optimize_function_for_size_p (cfun)
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      rs6000_emit_swsqrt (operands[0], operands[1], 0);
+      DONE;
+    }
+})
+
 ;; Floating point reciprocal approximation
 (define_insn "fre<Fs>"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>")

Reply via email to