On Thu, 23 Feb 2023 23:28:11 GMT, Joe Darcy <[email protected]> wrote:
> The wheel of FDLIBM porting turns a notch and sqrt comes into play.
>
> While the sqrt operation usually has a hardware implementation that is
> intrinsified, for completeness a software implementation should be available
> as well.
In local testing, both the transliteration port and the Fdlibm.java port are
consistent with hardware sqrt when run against all float values.
Comparing the various ports, original C sources vs transliteration port:
$ diff -w Sqrt.c Sqrt.translit.java
1c1
< /* __ieee754_sqrt(x)
---
> /**
69a70,71
> static class Sqrt {
> private static final double one = 1.0, tiny=1.0e-300;
71,86c73,74
< #include "fdlibm.h"
<
< #ifdef __STDC__
< static const double one = 1.0, tiny=1.0e-300;
< #else
< static double one = 1.0, tiny=1.0e-300;
< #endif
<
< #ifdef __STDC__
< double __ieee754_sqrt(double x)
< #else
< double __ieee754_sqrt(x)
< double x;
< #endif
< {
< double z;
---
> public static double compute(double x) {
> double z = 0.0;
88c76
< unsigned r,t1,s1,ix1,q1;
---
> /*unsigned*/ int r,t1,s1,ix1,q1;
110c98
< ix0 |= (ix1>>11); ix1 <<= 21;
---
> ix0 |= (ix1>>>11); ix1 <<= 21; // unsigned shift
114c102
< ix0 |= (ix1>>(32-i));
---
> ix0 |= (ix1>>>(32-i)); // unsigned shift
119,120c107,108
< if(m&1){ /* odd m, double x to make it even */
< ix0 += ix0 + ((ix1&sign)>>31);
---
> if((m&1) != 0){ /* odd m, double x to make it even */
> ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift
126c114
< ix0 += ix0 + ((ix1&sign)>>31);
---
> ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift
138c126
< ix0 += ix0 + ((ix1&sign)>>31);
---
> ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift
140c128
< r>>=1;
---
> r>>>=1; // unsigned shift
147c135
< if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
---
> if((t<ix0)||((t==ix0)&&(Integer.compareUnsigned(t1, ix1) <= 0
> ))) { // t1<=ix1
151c139
< if (ix1 < t1) ix0 -= 1;
---
> if (Integer.compareUnsigned(ix1, t1) < 0) ix0 -= 1; //
> ix1 < t1
155c143
< ix0 += ix0 + ((ix1&sign)>>31);
---
> ix0 += ix0 + ((ix1&sign)>>>31);
157c145
< r>>=1;
---
> r>>>=1; // unsigned shift
165c153
< if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
---
> if (q1==0xffffffff) { q1=0; q += 1;}
167c155
< if (q1==(unsigned)0xfffffffe) q+=1;
---
> if (q1==0xfffffffe) q+=1;
174c162
< ix1 = q1>>1;
---
> ix1 = q1>>>1; // unsigned shift
177,178c165,168
< __HI(z) = ix0;
< __LO(z) = ix1;
---
> // __HI(z) = ix0;
> z = __HI(z, ix0);
> // __LO(z) = ix1;
> z = __LO(z, ix1);
180a171
> }
The the variables declared unsigned, checked to use unsigned right shifts (>>>)
and to use the unsigned comparison methods.
$ diff -w Sqrt.translit.java Sqrt.fdlibm.java
71c71
< private static final double one = 1.0, tiny=1.0e-300;
---
> private Sqrt() {throw new UnsupportedOperationException();}
73c73,75
< public static double compute(double x) {
---
> private static final double tiny = 1.0e-300;
>
> static double compute(double x) {
75c77
< int sign = (int)0x80000000;
---
> int sign = 0x8000_0000;
79,80c81,82
< ix0 = __HI(x); /* high word of x */
< ix1 = __LO(x); /* low word of x */
---
> ix0 = __HI(x); // high word of x
> ix1 = __LO(x); // low word of x
82,85c84,86
< /* take care of Inf and NaN */
< if((ix0&0x7ff00000)==0x7ff00000) {
< return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
< sqrt(-inf)=sNaN */
---
> // take care of Inf and NaN
> if((ix0 & 0x7ff0_0000) == 0x7ff0_0000) {
> return x*x + x; // sqrt(NaN)=NaN, sqrt(+inf)=+inf,
> sqrt(-inf)=sNaN
87c88
< /* take care of zero */
---
> // take care of zero
89c90,91
< if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
---
> if (((ix0 & (~sign)) | ix1) == 0)
> return x; // sqrt(+-0) = +-0
91c93
< return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
---
> return (x-x)/(x-x); // sqrt(-ve) = sNaN
93c95
< /* normalize x */
---
> // normalize x
95c97
< if(m==0) { /* subnormal x */
---
> if (m == 0) { // subnormal x
98c100,104
< ix0 |= (ix1>>>11); ix1 <<= 21; // unsigned shift
---
> ix0 |= (ix1 >>> 11); // unsigned shift
> ix1 <<= 21;
> }
> for(i = 0; (ix0 & 0x0010_0000) == 0; i++) {
> ix0 <<= 1;
100d105
< for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
105,107c110,112
< m -= 1023; /* unbias exponent */
< ix0 = (ix0&0x000fffff)|0x00100000;
< if((m&1) != 0){ /* odd m, double x to make it even */
---
> m -= 1023; // unbias exponent */
> ix0 = (ix0 & 0x000f_ffff) | 0x0010_0000;
> if ((m & 1) != 0){ // odd m, double x to make it even
111c116
< m >>= 1; /* m = [m/2] */
---
> m >>= 1; // m = [m/2]
113c118
< /* generate sqrt(x) bit by bit */
---
> // generate sqrt(x) bit by bit
116,117c121,122
< q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
< r = 0x00200000; /* r = moving bit from right to left */
---
> q = q1 = s0 = s1 = 0; // [q,q1] = sqrt(x)
> r = 0x0020_0000; // r = moving bit from right to left
135c140,141
< if((t<ix0)||((t==ix0)&&(Integer.compareUnsigned(t1, ix1) <= 0
))) { // t1<=ix1
---
> if((t < ix0) ||
> ((t == ix0) && (Integer.compareUnsigned(t1, ix1) <= 0 )))
> { // t1 <= ix1
137c143,145
< if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
---
> if (((t1 & sign) == sign) && (s1 & sign) == 0) {
> s0 += 1;
> }
139c147,149
< if (Integer.compareUnsigned(ix1, t1) < 0) ix0 -= 1; //
ix1 < t1
---
> if (Integer.compareUnsigned(ix1, t1) < 0) { // ix1 < t1
> ix0 -= 1;
> }
148c158
< /* use floating add to find out rounding direction */
---
> // use floating add to find out rounding direction
150,155c160,169
< z = one-tiny; /* trigger inexact flag */
< if (z>=one) {
< z = one+tiny;
< if (q1==0xffffffff) { q1=0; q += 1;}
< else if (z>one) {
< if (q1==0xfffffffe) q+=1;
---
> z = 1.0 - tiny; // trigger inexact flag
> if (z >= 1.0) {
> z = 1.0 + tiny;
> if (q1 == 0xffff_ffff) {
> q1 = 0;
> q += 1;
> } else if (z > 1.0) {
> if (q1 == 0xffff_fffe) {
> q += 1;
> }
157c171
< } else
---
> } else {
161c175,176
< ix0 = (q>>1)+0x3fe00000;
---
> }
> ix0 = (q >> 1) + 0x3fe0_0000;
163c178,180
< if ((q&1)==1) ix1 |= sign;
---
> if ((q & 1) == 1) {
> ix1 |= sign;
> }
165d181
< // __HI(z) = ix0;
167d182
< // __LO(z) = ix1;
-------------
PR: https://git.openjdk.org/jdk/pull/12736