On Thu, 23 Feb 2023 23:28:11 GMT, Joe Darcy <da...@openjdk.org> wrote:

> The wheel of FDLIBM porting turns a notch and sqrt comes into play.
> 
> While the sqrt operation usually has a hardware implementation that is 
> intrinsified, for completeness a software implementation should be available 
> as well.

In local testing, both the transliteration port and the Fdlibm.java port are 
consistent with hardware sqrt when run against all float values.

Comparing the various ports, original C sources vs transliteration port:


$ diff -w Sqrt.c  Sqrt.translit.java 
1c1
< /* __ieee754_sqrt(x)
---
>     /**
69a70,71
>     static class Sqrt {
>         private static final double    one     = 1.0, tiny=1.0e-300;
71,86c73,74
< #include "fdlibm.h"
< 
< #ifdef __STDC__
< static  const double    one     = 1.0, tiny=1.0e-300;
< #else
< static  double  one     = 1.0, tiny=1.0e-300;
< #endif
< 
< #ifdef __STDC__
<         double __ieee754_sqrt(double x)
< #else
<         double __ieee754_sqrt(x)
<         double x;
< #endif
< {
<         double z;
---
>         public static double compute(double x) {
>             double z = 0.0;
88c76
<         unsigned r,t1,s1,ix1,q1;
---
>             /*unsigned*/ int r,t1,s1,ix1,q1;
110c98
<                 ix0 |= (ix1>>11); ix1 <<= 21;
---
>                     ix0 |= (ix1>>>11); ix1 <<= 21; // unsigned shift
114c102
<             ix0 |= (ix1>>(32-i));
---
>                 ix0 |= (ix1>>>(32-i)); // unsigned shift
119,120c107,108
<         if(m&1){        /* odd m, double x to make it even */
<             ix0 += ix0 + ((ix1&sign)>>31);
---
>             if((m&1) != 0){        /* odd m, double x to make it even */
>                 ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift
126c114
<         ix0 += ix0 + ((ix1&sign)>>31);
---
>             ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift
138c126
<             ix0 += ix0 + ((ix1&sign)>>31);
---
>                 ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift
140c128
<             r>>=1;
---
>                 r>>>=1; // unsigned shift
147c135
<             if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
---
>                 if((t<ix0)||((t==ix0)&&(Integer.compareUnsigned(t1, ix1) <= 0 
> ))) { // t1<=ix1
151c139
<                 if (ix1 < t1) ix0 -= 1;
---
>                     if (Integer.compareUnsigned(ix1, t1) < 0) ix0 -= 1; // 
> ix1 < t1
155c143
<             ix0 += ix0 + ((ix1&sign)>>31);
---
>                 ix0 += ix0 + ((ix1&sign)>>>31);
157c145
<             r>>=1;
---
>                 r>>>=1; // unsigned shift
165c153
<                 if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
---
>                     if (q1==0xffffffff) { q1=0; q += 1;}
167c155
<                     if (q1==(unsigned)0xfffffffe) q+=1;
---
>                         if (q1==0xfffffffe) q+=1;
174c162
<         ix1 =  q1>>1;
---
>             ix1 =  q1>>>1; // unsigned shift
177,178c165,168
<         __HI(z) = ix0;
<         __LO(z) = ix1;
---
>             // __HI(z) = ix0;
>             z = __HI(z, ix0);
>             // __LO(z) = ix1;
>             z = __LO(z, ix1);
180a171
>     }



The the variables declared unsigned, checked to use unsigned right shifts (>>>) 
and to use the unsigned comparison methods.


$ diff -w  Sqrt.translit.java Sqrt.fdlibm.java 
71c71
<         private static final double    one     = 1.0, tiny=1.0e-300;
---
>         private Sqrt() {throw new UnsupportedOperationException();}
73c73,75
<         public static double compute(double x) {
---
>         private static final double tiny = 1.0e-300;
> 
>         static double compute(double x) {
75c77
<             int     sign = (int)0x80000000;
---
>             int sign = 0x8000_0000;
79,80c81,82
<             ix0 = __HI(x);                  /* high word of x */
<             ix1 = __LO(x);          /* low word of x */
---
>             ix0 = __HI(x);  // high word of x
>             ix1 = __LO(x);  // low word of x
82,85c84,86
<             /* take care of Inf and NaN */
<             if((ix0&0x7ff00000)==0x7ff00000) {
<                 return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
<                                                sqrt(-inf)=sNaN */
---
>             // take care of Inf and NaN
>             if((ix0 & 0x7ff0_0000) == 0x7ff0_0000) {
>                 return x*x + x; // sqrt(NaN)=NaN, sqrt(+inf)=+inf, 
> sqrt(-inf)=sNaN
87c88
<             /* take care of zero */
---
>             // take care of zero
89c90,91
<                 if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
---
>                 if (((ix0 & (~sign)) | ix1) == 0)
>                     return x; // sqrt(+-0) = +-0
91c93
<                     return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
---
>                     return (x-x)/(x-x); // sqrt(-ve) = sNaN
93c95
<             /* normalize x */
---
>             // normalize x
95c97
<             if(m==0) {                              /* subnormal x */
---
>             if (m == 0) { // subnormal x
98c100,104
<                     ix0 |= (ix1>>>11); ix1 <<= 21; // unsigned shift
---
>                     ix0 |= (ix1 >>> 11); // unsigned shift
>                     ix1 <<= 21;
>                 }
>                 for(i = 0; (ix0 & 0x0010_0000) == 0; i++) {
>                     ix0 <<= 1;
100d105
<                 for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
105,107c110,112
<             m -= 1023;      /* unbias exponent */
<             ix0 = (ix0&0x000fffff)|0x00100000;
<             if((m&1) != 0){        /* odd m, double x to make it even */
---
>             m -= 1023;      // unbias exponent */
>             ix0 = (ix0 & 0x000f_ffff) | 0x0010_0000;
>             if ((m & 1) != 0){        // odd m, double x to make it even
111c116
<             m >>= 1;        /* m = [m/2] */
---
>             m >>= 1;        // m = [m/2]
113c118
<             /* generate sqrt(x) bit by bit */
---
>             // generate sqrt(x) bit by bit
116,117c121,122
<             q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
<             r = 0x00200000;         /* r = moving bit from right to left */
---
>             q = q1 = s0 = s1 = 0;   // [q,q1] = sqrt(x)
>             r = 0x0020_0000;        // r = moving bit from right to left
135c140,141
<                 if((t<ix0)||((t==ix0)&&(Integer.compareUnsigned(t1, ix1) <= 0 
))) { // t1<=ix1
---
>                 if((t < ix0) ||
>                    ((t == ix0) && (Integer.compareUnsigned(t1, ix1) <= 0 ))) 
> { // t1 <= ix1
137c143,145
<                     if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
---
>                     if (((t1 & sign) == sign) && (s1 & sign) == 0) {
>                         s0 += 1;
>                     }
139c147,149
<                     if (Integer.compareUnsigned(ix1, t1) < 0) ix0 -= 1; // 
ix1 < t1
---
>                     if (Integer.compareUnsigned(ix1, t1) < 0) {  // ix1 < t1
>                         ix0 -= 1;
>                     }
148c158
<             /* use floating add to find out rounding direction */
---
>             // use floating add to find out rounding direction
150,155c160,169
<                 z = one-tiny; /* trigger inexact flag */
<                 if (z>=one) {
<                     z = one+tiny;
<                     if (q1==0xffffffff) { q1=0; q += 1;}
<                     else if (z>one) {
<                         if (q1==0xfffffffe) q+=1;
---
>                 z = 1.0 - tiny; // trigger inexact flag
>                 if (z >= 1.0) {
>                     z = 1.0 + tiny;
>                     if (q1 == 0xffff_ffff) {
>                         q1 = 0;
>                         q += 1;
>                     } else if (z > 1.0) {
>                         if (q1 == 0xffff_fffe) {
>                             q += 1;
>                         }
157c171
<                     } else
---
>                     } else {
161c175,176
<             ix0 = (q>>1)+0x3fe00000;
---
>             }
>             ix0 = (q >> 1) + 0x3fe0_0000;
163c178,180
<             if ((q&1)==1) ix1 |= sign;
---
>             if ((q & 1) == 1) {
>                 ix1 |= sign;
>             }
165d181
<             // __HI(z) = ix0;
167d182
<             // __LO(z) = ix1;

-------------

PR: https://git.openjdk.org/jdk/pull/12736

Reply via email to