On Wed, 1 Mar 2023 05:28:34 GMT, Joe Darcy <da...@openjdk.org> wrote:
> Last and certainly not least in the port of FDLIBM to Java, the > transcendental methods for sin, cos, and tan. > > Some more tests are to be written in the StrictMath directory to verify that > the StrictMath algorihtm for sin/cos/tan is being used rather than a > different one. However, I wanted to get the rest of the change out for review > first. > > The sin/cos/tan methods are grouped together since they share the same > argument reduction logic. Argument reduction is the process of mapping an > argument of a function to an argument in a restricted range (and possibly > returning some function of the reduced argument). For sin, cos, and tan, > since they are fundamentally periodic with respect to a multiple of pi, > argument reduction is done to find the remainder of the original argument > with respect to pi/2. And the shared remainder-pi-over-2 code: $ diff -w RemPio2.c RemPio2.translit.java 1c1 < /* __ieee754_rem_pio2(x,y) --- > /** __ieee754_rem_pio2(x,y) 6,8c6 < < #include "fdlibm.h" < --- > static class RemPio2 { 12,16c10 < #ifdef __STDC__ < static const int two_over_pi[] = { < #else < static int two_over_pi[] = { < #endif --- > private static final int[] two_over_pi = { 30,34c24 < #ifdef __STDC__ < static const int npio2_hw[] = { < #else < static int npio2_hw[] = { < #endif --- > private static final int[] npio2_hw = { 53,57c43 < #ifdef __STDC__ < static const double < #else < static double < #endif --- > private static final double 69,77c55,57 < #ifdef __STDC__ < int __ieee754_rem_pio2(double x, double *y) < #else < int __ieee754_rem_pio2(x,y) < double x,y[]; < #endif < { < double z,w,t,r,fn; < double tx[3]; --- > static int __ieee754_rem_pio2(double x, double[] y) { > double z = 0.0,w,t,r,fn; > double[] tx = new double[3]; 110c90 < t = fabs(x); --- > t = Math.abs(x); 148c128,129 < __LO(z) = __LO(x); --- > // __LO(z) = __LO(x); > z = __LO(z, __LO(x)); 150c131,132 < __HI(z) = ix - (e0<<20); --- > // __HI(z) = ix - (e0<<20); > z = __HI(z, ix - (e0<<20)); 158c140 < n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); --- > n = KernelRemPio2.__kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); 162c144,147 < /* --- > > } > > /** 268,269c253 < < --- > static class KernelRemPio2 { 278c262 < #include "fdlibm.h" --- > private static final int[] init_jk = {2,3,4,6}; /* initial value for > jk */ 280,290c264 < #ifdef __STDC__ < static const int init_jk[] = {2,3,4,6}; /* initial value for jk */ < #else < static int init_jk[] = {2,3,4,6}; < #endif < < #ifdef __STDC__ < static const double PIo2[] = { < #else < static double PIo2[] = { < #endif --- > private static final double[] PIo2 = { 300,305c274 < < #ifdef __STDC__ < static const double < #else < static double < #endif --- > static final double 311,319c280,286 < #ifdef __STDC__ < int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) < #else < int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) < double x[], y[]; int e0,nx,prec; int ipio2[]; < #endif < { < int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; < double z,fw,f[20],fq[20],q[20]; --- > static int __kernel_rem_pio2(double[] x, double[] y, int e0, int nx, > int prec, final int[] ipio2) { > int jz,jx,jv,jp,jk,carry,n,i,j,k,m,q0,ih; > int[] iq = new int[20]; > double z,fw; > double [] f = new double[20]; > double [] fq= new double[20]; > double [] q = new double[20]; 341c308 < recompute: --- > while(true) { 350,351c317,318 < z = scalbn(z,q0); /* actual value of z */ < z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ --- > z = Math.scalb(z,q0); /* actual value of z */ > z -= 8.0*Math.floor(z*0.125); /* trim off integer > >= 8 */ 383c350 < if(carry!=0) z -= scalbn(one,q0); --- > if(carry!=0) z -= Math.scalb(one,q0); 400,401c367,369 < goto recompute; < } --- > continue; > } else { break;} > } else {break;} 409c377 < z = scalbn(z,-q0); --- > z = Math.scalb(z,-q0); 419c387 < fw = scalbn(one,q0); --- > fw = Math.scalb(one,q0); 465a434 > } Yes, dear reader, the original C code contained a goto to implement a looping contract. To maintain close code correspondence, I wrapped with code in question with a "while(true){...}" and then "break"-ed or "continue"-d to iterate or exit the loop. $ diff -w RemPio2.translit.java RemPio2.fdlibm.java 44,53c44,50 < zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ < half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ < two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ < invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ < pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ < pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ < pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ < pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ < pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ < pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ --- > invpio2 = 0x1.45f306dc9c883p-1, // 6.36619772367581382433e-01 > pio2_1 = 0x1.921fb544p0, // 1.57079632673412561417e+00 > pio2_1t = 0x1.0b4611a626331p-34, // 6.07710050650619224932e-11 > pio2_2 = 0x1.0b4611a6p-34, // 6.07710050630396597660e-11 > pio2_2t = 0x1.3198a2e037073p-69, // 2.02226624879595063154e-21 > pio2_3 = 0x1.3198a2ep-69, // 2.02226624871116645580e-21 > pio2_3t = 0x1.b839a252049c1p-104; // 8.47842766036889956997e-32 60,64c57,64 < hx = __HI(x); /* high word of x */ < ix = hx&0x7fffffff; < if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */ < {y[0] = x; y[1] = 0; return 0;} < if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ --- > hx = __HI(x); // high word of x > ix = hx & 0x7fff_ffff; > if (ix <= 0x3fe9_21fb) { // |x| ~<= pi/4 , no need for reduction > y[0] = x; > y[1] = 0; > return 0; > } > if (ix < 0x4002_d97c) { // |x| < 3pi/4, special case with n=+-1 67c67 < if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ --- > if (ix != 0x3ff9_21fb) { // 33+53 bit pi is good enough 70c70 < } else { /* near pi/2, use 33+33+53 bit pi */ --- > } else { // near pi/2, use 33+33+53 bit pi 76c76 < } else { /* negative x */ --- > } else { // negative x 78c78 < if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ --- > if (ix != 0x3ff_921fb) { // 33+53 bit pi is good enough 81c81 < } else { /* near pi/2, use 33+33+53 bit pi */ --- > } else { // near pi/2, use 33+33+53 bit pi 89c89 < if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ --- > if (ix <= 0x4139_21fb) { // |x| ~<= 2^19*(pi/2), medium size 91c91 < n = (int) (t*invpio2+half); --- > n = (int) (t*invpio2 + 0.5); 94c94 < w = fn*pio2_1t; /* 1st round good to 85 bit */ --- > w = fn*pio2_1t; // 1st round good to 85 bit 96c96 < y[0] = r-w; /* quick check no cancellation */ --- > y[0] = r - w; // quick check no cancellation 101c101 < if(i>16) { /* 2nd iteration needed, good to 118 */ --- > if (i > 16) { // 2nd iteration needed, good to 118 108,109c108,109 < if(i>49) { /* 3rd iteration need, 151 bits acc */ < t = r; /* will cover all possible cases */ --- > if (i > 49) { // 3rd iteration need, 151 bits acc > t = r; // will cover all possible cases 118,119c118,124 < if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} < else return n; --- > if (hx < 0) { > y[0] = -y[0]; > y[1] = -y[1]; > return -n; > } else { > return n; > } 124,125c129,131 < if(ix>=0x7ff00000) { /* x is inf or NaN */ < y[0]=y[1]=x-x; return 0; --- > if (ix >= 0x7ff0_0000) { // x is inf or NaN > y[0] = y[1] = x - x; > return 0; 127,128c133 < /* set z = scalbn(|x|,ilogb(x)-23) */ < // __LO(z) = __LO(x); --- > // set z = scalbn(|x|,ilogb(x)-23) 131d135 < // __HI(z) = ix - (e0<<20); 135c139 < z = (z-tx[i])*two24; --- > z = (z - tx[i])*TWO24; 139c143,145 < while(tx[nx-1]==zero) nx--; /* skip zero term */ --- > while (tx[nx - 1] == 0.0) { // skip zero term > nx--; > } 141c147,151 < if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} --- > if (hx < 0) { > y[0] = -y[0]; > y[1] = -y[1]; > return -n; > } 144d153 < 262c271 < private static final int[] init_jk = {2,3,4,6}; /* initial value for jk */ --- > private static final int init_jk[] = {2, 3, 4, 6}; // initial value > for jk 264,272c273,281 < private static final double[] PIo2 = { < 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ < 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ < 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ < 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ < 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ < 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ < 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ < 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ --- > private static final double PIo2[] = { > 0x1.921fb4p0, // 1.57079625129699707031e+00 > 0x1.4442dp-24, // 7.54978941586159635335e-08 > 0x1.846988p-48, // 5.39030252995776476554e-15 > 0x1.8cc516p-72, // 3.28200341580791294123e-22 > 0x1.01b838p-96, // 1.27065575308067607349e-29 > 0x1.a25204p-120, // 1.22933308981111328932e-36 > 0x1.382228p-145, // 2.73370053816464559624e-44 > 0x1.9f31dp-169, // 2.16741683877804819444e-51 273a283 > 275,278c285 < zero = 0.0, < one = 1.0, < two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ < twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ --- > twon24 = 0x1.0p-24; // 5.96046447753906250000e-08 288c295 < /* initialize jk*/ --- > // initialize jk 292c299 < /* determine jx,jv,q0, note that 3>q0 */ --- > // determine jx, jv, q0, note that 3 > q0 294c301,304 < jv = (e0-3)/24; if(jv<0) jv=0; --- > jv = (e0 - 3)/24; > if (jv < 0) { > jv = 0; > } 297,299c307,312 < /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ < j = jv-jx; m = jx+jk; < for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j]; --- > // set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] > j = jv - jx; > m = jx + jk; > for (i = 0; i <= m; i++, j++) { > f[i] = (j < 0) ? 0.0 : (double) ipio2[j]; > } 301c314 < /* compute q[0],q[1],...q[jk] */ --- > // compute q[0],q[1],...q[jk] 303c316,318 < for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; --- > for(j = 0, fw = 0.0; j <= jx; j++) { > fw += x[j]*f[jx + i - j]; > } 309c324 < /* distill q[] into iq[] reversingly */ --- > // distill q[] into iq[] reversingly 312c327 < iq[i] = (int)(z-two24*fw); --- > iq[i] = (int)(z - TWO24*fw); 316,318c331,333 < /* compute n */ < z = Math.scalb(z,q0); /* actual value of z */ < z -= 8.0*Math.floor(z*0.125); /* trim off integer >= 8 */ --- > // compute n > z = Math.scalb(z, q0); // actual value of z > z -= 8.0*Math.floor(z*0.125); // trim off integer > >= 8 322,323c337,339 < if(q0>0) { /* need iq[jz-1] to determine n */ < i = (iq[jz-1]>>(24-q0)); n += i; --- > if (q0 > 0) { // need iq[jz - 1] to determine n > i = (iq[jz - 1] >> (24 - q0)); > n += i; 325a342,345 > } else if (q0 == 0) { > ih = iq[jz-1]>>23; > } else if (z >= 0.5) { > ih=2; 327,328d346 < else if(q0==0) ih = iq[jz-1]>>23; < else if(z>=0.5) ih=2; 330,332c348,351 < if(ih>0) { /* q > 0.5 */ < n += 1; carry = 0; < for(i=0;i<jz ;i++) { /* compute 1-q */ --- > if ( ih > 0) { // q > 0.5 > n += 1; > carry = 0; > for (i=0; i < jz; i++) { // compute 1-q 336c355,356 < carry = 1; iq[i] = 0x1000000- j; --- > carry = 1; > iq[i] = 0x100_0000 - j; 338c358,359 < } else iq[i] = 0xffffff - j; --- > } else { > iq[i] = 0xff_ffff - j; 340c361,362 < if(q0>0) { /* rare case: chance is 1 in 12 */ --- > } > if (q0 > 0) { // rare case: chance is 1 in 12 343c365,366 < iq[jz-1] &= 0x7fffff; break; --- > iq[jz-1] &= 0x7f_ffff; > break; 345c368,369 < iq[jz-1] &= 0x3fffff; break; --- > iq[jz-1] &= 0x3f_ffff; > break; 349,350c373,376 < z = one - z; < if(carry!=0) z -= Math.scalb(one,q0); --- > z = 1.0 - z; > if (carry != 0) { > z -= Math.scalb(1.0, q0); > } 354,355c380,381 < /* check if recomputation is needed */ < if(z==zero) { --- > // check if recomputation is needed > if (z == 0.0) { 357,359c383,387 < for (i=jz-1;i>=jk;i--) j |= iq[i]; < if(j==0) { /* need recomputation */ < for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ --- > for (i = jz - 1; i >= jk; i--) { > j |= iq[i]; > } > if (j == 0) { // need recomputation > for(k=1; iq[jk - k] == 0; k++); // k = no. of terms > needed 361c389 < for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ --- > for(i = jz + 1; i <= jz + k; i++) { // add q[jz+1] > to q[jz+k] 363c391,393 < for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; --- > for (j=0, fw = 0.0; j <= jx; j++) { > fw += x[j]*f[jx + i - j]; > } 368,369c398,403 < } else { break;} < } else {break;} --- > } else { > break; > } > } else { > break; > } 372c406 < /* chop off zero terms */ --- > // chop off zero terms 374,376c408,414 < jz -= 1; q0 -= 24; < while(iq[jz]==0) { jz--; q0-=24;} < } else { /* break z into 24-bit if necessary */ --- > jz -= 1; > q0 -= 24; > while (iq[jz] == 0) { > jz--; > q0-=24; > } > } else { // break z into 24-bit if necessary 378c416 < if(z>=two24) { --- > if (z >= TWO24) { 380,381c418,420 < iq[jz] = (int)(z-two24*fw); < jz += 1; q0 += 24; --- > iq[jz] = (int)(z - TWO24*fw); > jz += 1; > q0 += 24; 383c422,424 < } else iq[jz] = (int) z ; --- > } else { > iq[jz] = (int) z; > } 386,387c427,428 < /* convert integer "bit" chunk to floating-point value */ < fw = Math.scalb(one,q0); --- > // convert integer "bit" chunk to floating-point value > fw = Math.scalb(1.0, q0); 389c430,431 < q[i] = fw*(double)iq[i]; fw*=twon24; --- > q[i] = fw*(double)iq[i]; > fw *= twon24; 392c434 < /* compute PIo2[0,...,jp]*q[jz,...,0] */ --- > // compute PIo2[0,...,jp]*q[jz,...,0] 394c436,438 < for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; --- > for (fw = 0.0, k = 0; k <= jp && k <= jz-i; k++) { > fw += PIo2[k]*q[i + k]; > } 398c442 < /* compress fq[] into y[] */ --- > // compress fq[] into y[] 402c446,448 < for (i=jz;i>=0;i--) fw += fq[i]; --- > for (i = jz; i >=0; i--) { > fw += fq[i]; > } 408c454,456 < for (i=jz;i>=0;i--) fw += fq[i]; --- > for (i = jz; i>=0; i--) { > fw += fq[i]; > } 411c459,461 < for (i=1;i<=jz;i++) fw += fq[i]; --- > for (i = 1; i <= jz; i++) { > fw += fq[i]; > } 414c464 < case 3: /* painful */ --- > case 3: // painful 425c475,477 < for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; --- > for (fw = 0.0, i = jz; i >= 2; i--) { > fw += fq[i]; > } 427c479,481 < y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; --- > y[0] = fq[0]; > y[1] = fq[1]; > y[2] = fw; 429c483,485 < y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; --- > y[0] = -fq[0]; > y[1] = -fq[1]; > y[2] = -fw; ------------- PR: https://git.openjdk.org/jdk/pull/12800