On Thu, 23 Feb 2023 23:28:11 GMT, Joe Darcy <da...@openjdk.org> wrote:
> The wheel of FDLIBM porting turns a notch and sqrt comes into play. > > While the sqrt operation usually has a hardware implementation that is > intrinsified, for completeness a software implementation should be available > as well. In local testing, both the transliteration port and the Fdlibm.java port are consistent with hardware sqrt when run against all float values. Comparing the various ports, original C sources vs transliteration port: $ diff -w Sqrt.c Sqrt.translit.java 1c1 < /* __ieee754_sqrt(x) --- > /** 69a70,71 > static class Sqrt { > private static final double one = 1.0, tiny=1.0e-300; 71,86c73,74 < #include "fdlibm.h" < < #ifdef __STDC__ < static const double one = 1.0, tiny=1.0e-300; < #else < static double one = 1.0, tiny=1.0e-300; < #endif < < #ifdef __STDC__ < double __ieee754_sqrt(double x) < #else < double __ieee754_sqrt(x) < double x; < #endif < { < double z; --- > public static double compute(double x) { > double z = 0.0; 88c76 < unsigned r,t1,s1,ix1,q1; --- > /*unsigned*/ int r,t1,s1,ix1,q1; 110c98 < ix0 |= (ix1>>11); ix1 <<= 21; --- > ix0 |= (ix1>>>11); ix1 <<= 21; // unsigned shift 114c102 < ix0 |= (ix1>>(32-i)); --- > ix0 |= (ix1>>>(32-i)); // unsigned shift 119,120c107,108 < if(m&1){ /* odd m, double x to make it even */ < ix0 += ix0 + ((ix1&sign)>>31); --- > if((m&1) != 0){ /* odd m, double x to make it even */ > ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift 126c114 < ix0 += ix0 + ((ix1&sign)>>31); --- > ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift 138c126 < ix0 += ix0 + ((ix1&sign)>>31); --- > ix0 += ix0 + ((ix1&sign)>>>31); // unsigned shift 140c128 < r>>=1; --- > r>>>=1; // unsigned shift 147c135 < if((t<ix0)||((t==ix0)&&(t1<=ix1))) { --- > if((t<ix0)||((t==ix0)&&(Integer.compareUnsigned(t1, ix1) <= 0 > ))) { // t1<=ix1 151c139 < if (ix1 < t1) ix0 -= 1; --- > if (Integer.compareUnsigned(ix1, t1) < 0) ix0 -= 1; // > ix1 < t1 155c143 < ix0 += ix0 + ((ix1&sign)>>31); --- > ix0 += ix0 + ((ix1&sign)>>>31); 157c145 < r>>=1; --- > r>>>=1; // unsigned shift 165c153 < if (q1==(unsigned)0xffffffff) { q1=0; q += 1;} --- > if (q1==0xffffffff) { q1=0; q += 1;} 167c155 < if (q1==(unsigned)0xfffffffe) q+=1; --- > if (q1==0xfffffffe) q+=1; 174c162 < ix1 = q1>>1; --- > ix1 = q1>>>1; // unsigned shift 177,178c165,168 < __HI(z) = ix0; < __LO(z) = ix1; --- > // __HI(z) = ix0; > z = __HI(z, ix0); > // __LO(z) = ix1; > z = __LO(z, ix1); 180a171 > } The the variables declared unsigned, checked to use unsigned right shifts (>>>) and to use the unsigned comparison methods. $ diff -w Sqrt.translit.java Sqrt.fdlibm.java 71c71 < private static final double one = 1.0, tiny=1.0e-300; --- > private Sqrt() {throw new UnsupportedOperationException();} 73c73,75 < public static double compute(double x) { --- > private static final double tiny = 1.0e-300; > > static double compute(double x) { 75c77 < int sign = (int)0x80000000; --- > int sign = 0x8000_0000; 79,80c81,82 < ix0 = __HI(x); /* high word of x */ < ix1 = __LO(x); /* low word of x */ --- > ix0 = __HI(x); // high word of x > ix1 = __LO(x); // low word of x 82,85c84,86 < /* take care of Inf and NaN */ < if((ix0&0x7ff00000)==0x7ff00000) { < return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf < sqrt(-inf)=sNaN */ --- > // take care of Inf and NaN > if((ix0 & 0x7ff0_0000) == 0x7ff0_0000) { > return x*x + x; // sqrt(NaN)=NaN, sqrt(+inf)=+inf, > sqrt(-inf)=sNaN 87c88 < /* take care of zero */ --- > // take care of zero 89c90,91 < if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ --- > if (((ix0 & (~sign)) | ix1) == 0) > return x; // sqrt(+-0) = +-0 91c93 < return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ --- > return (x-x)/(x-x); // sqrt(-ve) = sNaN 93c95 < /* normalize x */ --- > // normalize x 95c97 < if(m==0) { /* subnormal x */ --- > if (m == 0) { // subnormal x 98c100,104 < ix0 |= (ix1>>>11); ix1 <<= 21; // unsigned shift --- > ix0 |= (ix1 >>> 11); // unsigned shift > ix1 <<= 21; > } > for(i = 0; (ix0 & 0x0010_0000) == 0; i++) { > ix0 <<= 1; 100d105 < for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 105,107c110,112 < m -= 1023; /* unbias exponent */ < ix0 = (ix0&0x000fffff)|0x00100000; < if((m&1) != 0){ /* odd m, double x to make it even */ --- > m -= 1023; // unbias exponent */ > ix0 = (ix0 & 0x000f_ffff) | 0x0010_0000; > if ((m & 1) != 0){ // odd m, double x to make it even 111c116 < m >>= 1; /* m = [m/2] */ --- > m >>= 1; // m = [m/2] 113c118 < /* generate sqrt(x) bit by bit */ --- > // generate sqrt(x) bit by bit 116,117c121,122 < q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ < r = 0x00200000; /* r = moving bit from right to left */ --- > q = q1 = s0 = s1 = 0; // [q,q1] = sqrt(x) > r = 0x0020_0000; // r = moving bit from right to left 135c140,141 < if((t<ix0)||((t==ix0)&&(Integer.compareUnsigned(t1, ix1) <= 0 ))) { // t1<=ix1 --- > if((t < ix0) || > ((t == ix0) && (Integer.compareUnsigned(t1, ix1) <= 0 ))) > { // t1 <= ix1 137c143,145 < if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; --- > if (((t1 & sign) == sign) && (s1 & sign) == 0) { > s0 += 1; > } 139c147,149 < if (Integer.compareUnsigned(ix1, t1) < 0) ix0 -= 1; // ix1 < t1 --- > if (Integer.compareUnsigned(ix1, t1) < 0) { // ix1 < t1 > ix0 -= 1; > } 148c158 < /* use floating add to find out rounding direction */ --- > // use floating add to find out rounding direction 150,155c160,169 < z = one-tiny; /* trigger inexact flag */ < if (z>=one) { < z = one+tiny; < if (q1==0xffffffff) { q1=0; q += 1;} < else if (z>one) { < if (q1==0xfffffffe) q+=1; --- > z = 1.0 - tiny; // trigger inexact flag > if (z >= 1.0) { > z = 1.0 + tiny; > if (q1 == 0xffff_ffff) { > q1 = 0; > q += 1; > } else if (z > 1.0) { > if (q1 == 0xffff_fffe) { > q += 1; > } 157c171 < } else --- > } else { 161c175,176 < ix0 = (q>>1)+0x3fe00000; --- > } > ix0 = (q >> 1) + 0x3fe0_0000; 163c178,180 < if ((q&1)==1) ix1 |= sign; --- > if ((q & 1) == 1) { > ix1 |= sign; > } 165d181 < // __HI(z) = ix0; 167d182 < // __LO(z) = ix1; ------------- PR: https://git.openjdk.org/jdk/pull/12736