ni...@lysator.liu.se (Niels Möller) writes: > If we can get away with that, it gets a lot > simpler, > > if (r >= q_0) /* Needs to be branch free */ > { > q--; > r += d_1; > } > return q + (r >= d_1);
I've given it a try. Below patch almost works (as in "passes most but not all tests")... I don't yet know if the failure is due to lack of accuracy, or some less subtle type of bug. Regards, /Niels *** /tmp/extdiff.ynEWOO/gmp.27296256af57/mpn/generic/sbpi1_div_qr.c 2018-04-29 08:10:46.612753563 +0200 --- /home/nisse/hack/gmp/mpn/generic/sbpi1_div_qr.c 2018-04-29 08:10:15.232010203 +0200 *************** *** 40,43 **** --- 40,68 ---- #include "longlong.h" + static inline mp_limb_t + divappr_2 (mp_limb_t n2, mp_limb_t n1, + mp_limb_t d1, mp_limb_t d0, mp_limb_t dinv) + { + mp_limb_t q1, q0, r1, t1, t0, mask; + + umul_ppmm (q1, q0, n2, dinv); + add_ssaaaa (q1, q0, q1, q0, n2, n1); + + umul_ppmm (t1, t0, q1, d0); + q1++; /* May overflow */ + r1 = n1 - d1 * q1 - t1; + + /* FIXME: See if we can get sufficient accuracy without this? */ + t0 += d0; + r1 -= (t0 < d0); + + mask = - (r1 >= q0); + q1 += mask; + r1 += mask & d1; + q1 += (r1 >= d1); + + return q1; + } + mp_limb_t mpn_sbpi1_div_qr (mp_ptr qp, *************** *** 48,52 **** mp_limb_t qh; mp_size_t i; ! mp_limb_t n1, n0; mp_limb_t d1, d0; mp_limb_t cy, cy1; --- 73,77 ---- mp_limb_t qh; mp_size_t i; ! mp_limb_t n1; mp_limb_t d1, d0; mp_limb_t cy, cy1; *************** *** 72,79 **** np -= 2; - n1 = np[1]; - for (i = nn - (dn + 2); i > 0; i--) { np--; if (UNLIKELY (n1 == d1) && np[1] == d0) --- 97,104 ---- np -= 2; for (i = nn - (dn + 2); i > 0; i--) { + n1 = np[1]; + np--; if (UNLIKELY (n1 == d1) && np[1] == d0) *************** *** 85,101 **** else { ! udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); ! ! cy = mpn_submul_1 (np - dn, dp, dn, q); ! ! cy1 = n0 < cy; ! n0 = (n0 - cy) & GMP_NUMB_MASK; ! cy = n1 < cy1; ! n1 = (n1 - cy1) & GMP_NUMB_MASK; ! np[0] = n0; ! if (UNLIKELY (cy != 0)) { ! n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1); q--; } --- 110,123 ---- else { ! q = divappr_2 (n1, np[1], d1, d0, dinv); ! cy = mpn_submul_1 (np - dn, dp, dn + 2, q); ! /* Fails for ! GMP_CHECK_RANDOMIZE=264733444330758 ./tests/mpn/t-div ! */ ! ASSERT_ALWAYS (cy >= n1); ! if (UNLIKELY (cy != n1)) { ! ASSERT_CARRY(mpn_add_n (np - dn, np - dn, dp, dn + 2)); q--; } *************** *** 104,108 **** *--qp = q; } - np[1] = n1; return qh; --- 126,129 ---- -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel