Author: bde
Date: Tue Jul 17 10:44:16 2018
New Revision: 336400
URL: https://svnweb.freebsd.org/changeset/base/336400

Log:
  Fix scaling bugs which gave innaccuracies and spurious underflows in csqrt()
  and csqrtl().
  
  When one component is huge and the other is tiny, scaling down the tiny
  component gave spurious underflow.
  
  When both components are denormal, not scaling them up gave inaccuracies
  of 34+ ulps on not very carefully selected args.  Fixing this reduces the
  maximum error to 1.6 ulps on the same set of args (mosly not denormal ones).
  
  The scaling used multiplication of a complex variable by 2, but clang messes
  this on amd64 up by losing the sign of -0.0.  Calculate the components
  separately, as is well known to be needed for operations on more exceptional
  values.

Modified:
  head/lib/msun/src/s_csqrt.c
  head/lib/msun/src/s_csqrtl.c

Modified: head/lib/msun/src/s_csqrt.c
==============================================================================
--- head/lib/msun/src/s_csqrt.c Tue Jul 17 10:27:46 2018        (r336399)
+++ head/lib/msun/src/s_csqrt.c Tue Jul 17 10:44:16 2018        (r336400)
@@ -51,9 +51,7 @@ double complex
 csqrt(double complex z)
 {
        double complex result;
-       double a, b;
-       double t;
-       int scale;
+       double a, b, rx, ry, scale, t;
 
        a = creal(z);
        b = cimag(z);
@@ -86,27 +84,39 @@ csqrt(double complex z)
 
        /* Scale to avoid overflow. */
        if (fabs(a) >= THRESH || fabs(b) >= THRESH) {
-               a *= 0.25;
-               b *= 0.25;
-               scale = 1;
+               /*
+                * Don't scale a or b if this might give (spurious)
+                * underflow.  Then the unscaled value is an equivalent
+                * infinitesmal (or 0).
+                */
+               if (fabs(a) >= 0x1p-1020)
+                       a *= 0.25;
+               if (fabs(b) >= 0x1p-1020)
+                       b *= 0.25;
+               scale = 2;
        } else {
-               scale = 0;
+               scale = 1;
        }
 
+       /* Scale to reduce inaccuracies when both components are denormal. */
+       if (fabs(a) < 0x1p-1022 && fabs(b) < 0x1p-1022) {
+               a *= 0x1p54;
+               b *= 0x1p54;
+               scale = 0x1p-27;
+       }
+
        /* Algorithm 312, CACM vol 10, Oct 1967. */
        if (a >= 0) {
                t = sqrt((a + hypot(a, b)) * 0.5);
-               result = CMPLX(t, b / (2 * t));
+               rx = t;
+               ry = b / (2 * t);
        } else {
                t = sqrt((-a + hypot(a, b)) * 0.5);
-               result = CMPLX(fabs(b) / (2 * t), copysign(t, b));
+               rx = fabs(b) / (2 * t);
+               ry = copysign(t, b);
        }
 
-       /* Rescale. */
-       if (scale)
-               return (result * 2);
-       else
-               return (result);
+       return (CMPLX(rx * scale, ry * scale));
 }
 
 #if LDBL_MANT_DIG == 53

Modified: head/lib/msun/src/s_csqrtl.c
==============================================================================
--- head/lib/msun/src/s_csqrtl.c        Tue Jul 17 10:27:46 2018        
(r336399)
+++ head/lib/msun/src/s_csqrtl.c        Tue Jul 17 10:44:16 2018        
(r336400)
@@ -59,9 +59,7 @@ long double complex
 csqrtl(long double complex z)
 {
        long double complex result;
-       long double a, b;
-       long double t;
-       int scale;
+       long double a, b, rx, ry, scale, t;
 
        a = creall(z);
        b = cimagl(z);
@@ -94,25 +92,37 @@ csqrtl(long double complex z)
 
        /* Scale to avoid overflow. */
        if (fabsl(a) >= THRESH || fabsl(b) >= THRESH) {
-               a *= 0.25;
-               b *= 0.25;
-               scale = 1;
+               /*
+                * Don't scale a or b if this might give (spurious)
+                * underflow.  Then the unscaled value is an equivalent
+                * infinitesmal (or 0).
+                */
+               if (fabsl(a) >= 0x1p-16380L)
+                       a *= 0.25;
+               if (fabsl(b) >= 0x1p-16380L)
+                       b *= 0.25;
+               scale = 2;
        } else {
-               scale = 0;
+               scale = 1;
        }
 
+       /* Scale to reduce inaccuracies when both components are denormal. */
+       if (fabsl(a) < 0x1p-16382L && fabsl(b) < 0x1p-16382L) {
+               a *= 0x1p64;
+               b *= 0x1p64;
+               scale = 0x1p-32;
+       }
+
        /* Algorithm 312, CACM vol 10, Oct 1967. */
        if (a >= 0) {
                t = sqrtl((a + hypotl(a, b)) * 0.5);
-               result = CMPLXL(t, b / (2 * t));
+               rx = t;
+               ry = b / (2 * t);
        } else {
                t = sqrtl((-a + hypotl(a, b)) * 0.5);
-               result = CMPLXL(fabsl(b) / (2 * t), copysignl(t, b));
+               rx = fabsl(b) / (2 * t);
+               ry = copysignl(t, b);
        }
 
-       /* Rescale. */
-       if (scale)
-               return (result * 2);
-       else
-               return (result);
+       return (CMPLXL(rx * scale, ry * scale));
 }
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to