ni...@lysator.liu.se (Niels Möller) writes: > I don't think I have a real plan. But the most practical way forward I > see is to > > 1. Adapt bdiv_mu to the new convention by writing a simple wrapper around > the current code. > > 2. Iron out remaining problems in bdiv callers as well as any > remaining bugs in dc and sb. > > 3. Possibly change the implementation of mu_bdiv.
I'm now halfway through step (2), also fixing a few bugs in the sb code.. The updated patch below seems to work fine according to (also patched) tests/mpn/t-bdiv. I'll leave a while GMP_CHECK_RANDOMIZE=0 tests/mpn/t-bdiv ; do : ; done running overnight. Some other test cases fail, though: t-root, t-perfpow, t-divis, and t-cong. I'm not familiar with this code, I had a quick look at the root code but didn't see any obvious dependency on bdiv. Regards, /Niels diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/binvert.c gmp-bdiv/mpn/generic/binvert.c *** gmp-bdiv.b5ca16212198/mpn/generic/binvert.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/binvert.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 75,78 **** --- 75,80 ---- mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di); + mpn_neg (rp, rp, rn); + /* Use Newton iterations to get the desired precision. */ for (; rn < n; rn = newrn) diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c gmp-bdiv/mpn/generic/dcpi1_bdiv_q.c *** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/dcpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 36,40 **** } ! /* Computes Q = N / D mod B^n, destroys N. N = {np,n} --- 36,40 ---- } ! /* Computes Q = - N / D mod B^n, destroys N. N = {np,n} *************** *** 58,67 **** mpn_mullo_n (tp, qp, dp + hi, lo); ! mpn_sub_n (np + hi, np + hi, tp, lo); if (lo < hi) { ! cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]); ! np[n - 1] -= cy; } qp += lo; --- 58,67 ---- mpn_mullo_n (tp, qp, dp + hi, lo); ! mpn_add_n (np + hi, np + hi, tp, lo); if (lo < hi) { ! cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]); ! np[n - 1] += cy; } qp += lo; *************** *** 72,76 **** } ! /* Computes Q = N / D mod B^nn, destroys N. N = {np,nn} --- 72,76 ---- } ! /* Computes Q = - N / D mod B^nn, destroys N. N = {np,nn} *************** *** 120,124 **** mpn_incr_u (tp + qn, cy); ! mpn_sub (np + qn, np + qn, nn - qn, tp, dn); cy = 0; } --- 120,124 ---- mpn_incr_u (tp + qn, cy); ! mpn_add (np + qn, np + qn, nn - qn, tp, dn); cy = 0; } *************** *** 130,134 **** while (qn > dn) { ! mpn_sub_1 (np + dn, np + dn, qn - dn, cy); cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp); qp += dn; --- 130,134 ---- while (qn > dn) { ! mpn_add_1 (np + dn, np + dn, qn - dn, cy); cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp); qp += dn; diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c gmp-bdiv/mpn/generic/dcpi1_bdiv_qr.c *** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/dcpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 33,42 **** Output: ! q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS}, ! r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS} Stores q at qp. Stores the n least significant limbs of r at the high half ! of np, and returns the borrow from the subtraction n - q*d. d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */ --- 33,42 ---- Output: ! q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS}, ! r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS} Stores q at qp. Stores the n least significant limbs of r at the high half ! of np, and returns the carry from the addition n + q*d. d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */ *************** *** 67,71 **** mpn_incr_u (tp + lo, cy); ! rh = mpn_sub (np + lo, np + lo, n + hi, tp, n); if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD)) --- 67,71 ---- mpn_incr_u (tp + lo, cy); ! rh = mpn_add (np + lo, np + lo, n + hi, tp, n); if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD)) *************** *** 77,81 **** mpn_incr_u (tp + hi, cy); ! rh += mpn_sub_n (np + n, np + n, tp, n); return rh; --- 77,81 ---- mpn_incr_u (tp + hi, cy); ! rh += mpn_add_n (np + n, np + n, tp, n); return rh; *************** *** 123,127 **** mpn_incr_u (tp + qn, cy); ! rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn); cy = 0; } --- 123,127 ---- mpn_incr_u (tp + qn, cy); ! rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn); cy = 0; } *************** *** 133,137 **** do { ! rr += mpn_sub_1 (np + dn, np + dn, qn, cy); cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp); qp += dn; --- 133,137 ---- do { ! rr += mpn_add_1 (np + dn, np + dn, qn, cy); cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp); qp += dn; *************** *** 158,162 **** mpn_incr_u (tp + qn, cy); ! rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn); cy = 0; } --- 158,162 ---- mpn_incr_u (tp + qn, cy); ! rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn); cy = 0; } diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/divexact.c gmp-bdiv/mpn/generic/divexact.c *** gmp-bdiv.b5ca16212198/mpn/generic/divexact.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/divexact.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 89,92 **** --- 89,95 ---- mpn_bdiv_q (qp, np, qn, dp, dn, tp); TMP_FREE; + + /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */ + mpn_neg (qp, qp, qn); } diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c gmp-bdiv/mpn/generic/mu_bdiv_q.c *** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/mu_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 56,61 **** FIXME: Trim final quotient calculation to qn limbs of precision. */ ! void ! mpn_mu_bdiv_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, --- 56,61 ---- FIXME: Trim final quotient calculation to qn limbs of precision. */ ! static void ! mpn_mu_bdiv_q_old (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, *************** *** 215,218 **** --- 215,228 ---- } + void + mpn_mu_bdiv_q (mp_ptr qp, + mp_srcptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + mp_ptr scratch) + { + mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch); + mpn_neg (qp, qp, nn); + } + mp_size_t mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn) diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c gmp-bdiv/mpn/generic/mu_bdiv_qr.c *** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/mu_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 55,64 **** particular, when dn==in, tp and rp could use the same space. */ ! mp_limb_t ! mpn_mu_bdiv_qr (mp_ptr qp, ! mp_ptr rp, ! mp_srcptr np, mp_size_t nn, ! mp_srcptr dp, mp_size_t dn, ! mp_ptr scratch) { mp_size_t qn; --- 55,64 ---- particular, when dn==in, tp and rp could use the same space. */ ! static mp_limb_t ! mpn_mu_bdiv_qr_old (mp_ptr qp, ! mp_ptr rp, ! mp_srcptr np, mp_size_t nn, ! mp_srcptr dp, mp_size_t dn, ! mp_ptr scratch) { mp_size_t qn; *************** *** 233,236 **** --- 233,269 ---- } + mp_limb_t + mpn_mu_bdiv_qr (mp_ptr qp, + mp_ptr rp, + mp_srcptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + mp_ptr scratch) + { + mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch); + + /* R' B^{qn} = U - Q' D + * + * Q = B^{qn} - Q' (assuming Q' != 0) + * + * R B^{qn} = U + Q D = U + B^{qn} D - Q' D + * = B^{qn} D + R' + */ + + if (UNLIKELY (!mpn_neg (qp, qp, nn - dn))) + { + /* Zero quotient. */ + ASSERT (cy == 0); + return 0; + } + else + { + mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn); + ASSERT (cy2 >= cy); + + return cy2 - cy; + } + } + + mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn) diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/remove.c gmp-bdiv/mpn/generic/remove.c *** gmp-bdiv.b5ca16212198/mpn/generic/remove.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/remove.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 93,96 **** --- 93,98 ---- MP_PTR_SWAP (qp, qp2); qn = qn - pn; + mpn_neg (qp, qp, qn+1); + qn += qp[qn] != 0; *************** *** 132,135 **** --- 134,139 ---- MP_PTR_SWAP (qp, qp2); qn = qn - pn; + mpn_neg (qp, qp, qn+1); + qn += qp[qn] != 0; diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c gmp-bdiv/mpn/generic/sbpi1_bdiv_q.c *** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/sbpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 27,89 **** #include "gmp.h" #include "gmp-impl.h" ! ! /* Computes Q = N / D mod B^nn, destroys N. D must be odd. dinv is (-D)^-1 mod B. ! ! The straightforward way to compute Q is to cancel one limb at a time, using ! ! qp[i] = D^{-1} * np[i] (mod B) ! N -= B^i * qp[i] * D ! ! But we prefer addition to subtraction, since mpn_addmul_1 is often faster ! than mpn_submul_1. Q = - N / D can be computed by iterating ! ! qp[i] = (-D)^{-1} * np[i] (mod B) ! N += B^i * qp[i] * D ! ! And then we flip the sign, -Q = (not Q) + 1. */ void mpn_sbpi1_bdiv_q (mp_ptr qp, ! mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; ! mp_limb_t cy, q; ASSERT (dn > 0); ! ASSERT (nn >= dn); ASSERT ((dp[0] & 1) != 0); ! /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK, ! but some over N/Q overlaps will not work. */ ! for (i = nn - dn; i > 0; i--) { ! q = dinv * np[0]; ! cy = mpn_addmul_1 (np, dp, dn, q); ! mpn_add_1 (np + dn, np + dn, i, cy); ! ASSERT (np[0] == 0); ! qp[0] = ~q; ! qp++; ! np++; } - for (i = dn; i > 1; i--) { ! q = dinv * np[0]; ! mpn_addmul_1 (np, dp, i, q); ! ASSERT (np[0] == 0); ! qp[0] = ~q; ! qp++; ! np++; } /* Final limb */ ! q = dinv * np[0]; ! qp[0] = ~q; ! mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1); } --- 27,107 ---- #include "gmp.h" #include "gmp-impl.h" + #include "longlong.h" ! /* Computes Q = - U / D mod B^un, destroys U. D must be odd. dinv is (-D)^-1 mod B. ! */ void mpn_sbpi1_bdiv_q (mp_ptr qp, ! mp_ptr up, mp_size_t un, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; ! mp_limb_t q; ASSERT (dn > 0); ! ASSERT (un >= dn); ASSERT ((dp[0] & 1) != 0); ! ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn)); ! if (dn == 1) { ! mp_limb_t dl = dp[0]; ! mp_limb_t ul = *up++; ! mp_limb_t cy = 0; ! for (i = un - 1; i > 0; i--) ! { ! mp_limb_t p1, p0; ! q = dinv * ul; ! *qp++ = q; ! umul_ppmm (p1, p0, q, dl); ! ASSERT (p0 + ul == 0); ! cy += (ul > 0); ! p1 += cy; ! cy = p1 < cy; ! ul = p1 + *up++; ! cy += ul < p1; ! } ! *qp = dinv * ul; ! return; ! } ! if (un > dn) ! { ! mp_limb_t cy, hi; ! for (i = un - dn - 1, cy = 0; i > 0; i--) ! { ! q = dinv * up[0]; ! hi = mpn_addmul_1 (up, dp, dn, q); ! ! ASSERT (up[0] == 0); ! *qp++ = q; ! hi += cy; ! cy = hi < cy; ! hi += up[dn]; ! cy += hi < up[dn]; ! up[dn] = hi; ! up++; ! } ! q = dinv * up[0]; ! hi = cy + mpn_addmul_1 (up, dp, dn, q); ! ASSERT (up[0] == 0); ! *qp++ = q; ! up[dn] += hi; ! up++; } for (i = dn; i > 1; i--) { ! mp_limb_t q = dinv * up[0]; ! mpn_addmul_1 (up, dp, i, q); ! ASSERT (up[0] == 0); ! *qp++ = q; ! up++; } /* Final limb */ ! *qp = dinv * up[0]; } diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c gmp-bdiv/mpn/generic/sbpi1_bdiv_qr.c *** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpn/generic/sbpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 29,41 **** ! /* Computes a binary quotient of size qn = nn - dn. Output: ! Q = N * D^{-1} mod B^qn, ! R = (N - Q * D) * B^(-qn) ! Stores the dn least significant limbs of R at {np + nn - dn, dn}, ! and returns the borrow from the subtraction N - Q*D. D must be odd. dinv is (-D)^-1 mod B. */ --- 29,41 ---- ! /* Computes a binary quotient of size qn = un - dn. Output: ! Q = -U * D^{-1} mod B^qn, ! R = (U + Q * D) * B^(-qn) ! Stores the dn least significant limbs of R at {up + un - dn, dn}, ! and returns the carry from the addition N + Q*D. D must be odd. dinv is (-D)^-1 mod B. */ *************** *** 43,108 **** mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr qp, ! mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { - mp_size_t qn; mp_size_t i; ! mp_limb_t rh; ! mp_limb_t ql; ASSERT (dn > 0); ! ASSERT (nn > dn); ASSERT ((dp[0] & 1) != 0); ! /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK, ! but some over N/Q overlaps will not work. */ ! qn = nn - dn; ! ! rh = 0; ! ! /* To complete the negation, this value is added to q. */ ! ql = 1; ! while (qn > dn) { ! for (i = 0; i < dn; i++) ! { ! mp_limb_t q; ! ! q = dinv * np[i]; ! np[i] = mpn_addmul_1 (np + i, dp, dn, q); ! qp[i] = ~q; ! } ! rh += mpn_add (np + dn, np + dn, qn, np, dn); ! ql = mpn_add_1 (qp, qp, dn, ql); ! ! qp += dn; qn -= dn; ! np += dn; nn -= dn; ! } ! ! for (i = 0; i < qn; i++) ! { ! mp_limb_t q; ! ! q = dinv * np[i]; ! np[i] = mpn_addmul_1 (np + i, dp, dn, q); ! qp[i] = ~q; } ! rh += mpn_add_n (np + dn, np + dn, np, qn); ! ql = mpn_add_1 (qp, qp, qn, ql); ! ! if (UNLIKELY (ql > 0)) ! { ! /* q == 0 */ ! ASSERT (rh == 0); ! return 0; ! } ! else ! { ! mp_limb_t cy; ! ! cy = mpn_sub_n (np + qn, np + qn, dp, dn); ! ASSERT (cy >= rh); ! return cy - rh; ! } } --- 43,71 ---- mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr qp, ! mp_ptr up, mp_size_t un, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; ! mp_limb_t cy; ASSERT (dn > 0); ! ASSERT (un > dn); ASSERT ((dp[0] & 1) != 0); ! ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn)); ! for (i = un - dn, cy = 0; i != 0; i--) { ! mp_limb_t q = dinv * up[0]; ! mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q); ! *qp++ = q; ! ! hi += cy; ! cy = hi < cy; ! hi += up[dn]; ! cy += hi < up[dn]; ! up[dn] = hi; ! up++; } ! return cy; } diff -Nrc2 gmp-bdiv.b5ca16212198/mpz/bin_uiui.c gmp-bdiv/mpz/bin_uiui.c *** gmp-bdiv.b5ca16212198/mpz/bin_uiui.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/mpz/bin_uiui.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 271,274 **** --- 271,275 ---- nn -= kn; mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv); + mpn_neg (np, np, nn); if (kmax == 0) diff -Nrc2 gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c gmp-bdiv/tests/mpn/t-bdiv.c *** gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c 2012-07-15 00:16:56.000000000 +0200 --- gmp-bdiv/tests/mpn/t-bdiv.c 2012-07-15 00:16:56.000000000 +0200 *************** *** 59,63 **** { mp_size_t qn; - int cmp; mp_ptr tp; mp_limb_t cy = 4711; /* silence warnings */ --- 59,62 ---- *************** *** 78,90 **** mpn_mul (tp, qp, qn, dp, dn); ! if (rp != NULL) ! { ! cy = mpn_add_n (tp + qn, tp + qn, rp, dn); ! cmp = cy != rh || mpn_cmp (tp, np, nn) != 0; ! } ! else ! cmp = mpn_cmp (tp, np, nn - dn) != 0; ! if (cmp != 0) { printf ("\r*******************************************************************************\n"); --- 77,84 ---- mpn_mul (tp, qp, qn, dp, dn); ! cy = mpn_add_n (tp, tp, np, nn); ! if (! mpn_zero_p (tp, qn) ! || (rp != NULL && (cy != rh || mpn_cmp (tp + qn, rp, dn) != 0))) { printf ("\r*******************************************************************************\n"); -- Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org http://gmplib.org/mailman/listinfo/gmp-devel