t...@gmplib.org (Torbjörn Granlund) writes: > ni...@lysator.liu.se (Niels Möller) writes: > > So I see no strong reason to do it one way or the other. Maybe start > with requiring both u and v odd, and relax requirements later in case we > find any more tangible benefit from doing that? > > Makes sense.
Here's a patch to take out gcd_11 to it's own C file, and call it from gcd_1.c. Needs documentation (if you agree it should be a public function), and it would be nice with separate tests for this function.
diff -Nprc2 gmp-gcd_11.4c6ae64fa612/configure.ac gmp-gcd_11/configure.ac *** gmp-gcd_11.4c6ae64fa612/configure.ac 2019-08-05 11:07:24.000000000 +0200 --- gmp-gcd_11/configure.ac 2019-08-05 22:10:48.042277000 +0200 *************** gmp_mpn_functions="$extra_functions *** 2971,2975 **** scan0 scan1 popcount hamdist cmp zero_p \ perfsqr perfpow strongfibo \ ! gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step \ gcdext_lehmer \ div_q tdiv_qr jacbase jacobi_2 jacobi get_d \ --- 2971,2975 ---- scan0 scan1 popcount hamdist cmp zero_p \ perfsqr perfpow strongfibo \ ! gcd_11 gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step \ gcdext_lehmer \ div_q tdiv_qr jacbase jacobi_2 jacobi get_d \ *************** AH_VERBATIM([HAVE_NATIVE], *** 3560,3563 **** --- 3560,3564 ---- #undef HAVE_NATIVE_mpn_divrem_2 #undef HAVE_NATIVE_mpn_gcd_1 + #undef HAVE_NATIVE_mpn_gcd_11 #undef HAVE_NATIVE_mpn_hamdist #undef HAVE_NATIVE_mpn_invert_limb diff -Nprc2 gmp-gcd_11.4c6ae64fa612/gmp-h.in gmp-gcd_11/gmp-h.in *** gmp-gcd_11.4c6ae64fa612/gmp-h.in 2019-08-05 11:07:24.000000000 +0200 --- gmp-gcd_11/gmp-h.in 2019-08-05 22:10:48.042277000 +0200 *************** __GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (m *** 1517,1520 **** --- 1517,1523 ---- __GMP_DECLSPEC mp_size_t mpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t); + #define mpn_gcd_11 __MPN(gcd_11) + __GMP_DECLSPEC mp_limb_t mpn_gcd_11 (mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; + #define mpn_gcd_1 __MPN(gcd_1) __GMP_DECLSPEC mp_limb_t mpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; diff -Nprc2 gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_11.c gmp-gcd_11/mpn/generic/gcd_11.c *** gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_11.c 1970-01-01 01:00:00.000000000 +0100 --- gmp-gcd_11/mpn/generic/gcd_11.c 2019-08-05 22:10:48.042277000 +0200 *************** *** 0 **** --- 1,75 ---- + /* mpn_gcd_11 -- limb greatest common divisor. + + Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + + or both in parallel, as here. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received copies of the GNU General Public License and the + GNU Lesser General Public License along with the GNU MP Library. If not, + see https://www.gnu.org/licenses/. */ + + #include "gmp-impl.h" + #include "longlong.h" + + #if !HAVE_NATIVE_mpn_gcd_11 + mp_limb_t + mpn_gcd_11 (mp_limb_t u, mp_limb_t v) + { + ASSERT (u & v & 1); + + /* In this loop, we represent the odd numbers ulimb and vlimb + without the redundant least significant one bit. This reduction + in size by one bit ensures that the high bit of t, below, is set + if and only if vlimb > ulimb. */ + + u >>= 1; + v >>= 1; + + while (u != v) + { + mp_limb_t t; + mp_limb_t vgtu; + int c; + + t = u - v; + vgtu = LIMB_HIGHBIT_TO_MASK (t); + + /* v <-- min (u, v) */ + v += (vgtu & t); + + /* u <-- |u - v| */ + u = (t ^ vgtu) - vgtu; + + count_trailing_zeros (c, t); + /* We have c <= GMP_LIMB_BITS - 2 here, so that + + ulimb >>= (c + 1); + + would be safe. But unlike the addition c + 1, a separate + shift by 1 is independent of c, and can be executed in + parallel with count_trailing_zeros. */ + u = (u >> 1) >> c; + } + return (u << 1) + 1; + } + #endif /* !HAVE_NATIVE_mpn_gcd_11 */ diff -Nprc2 gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_1.c gmp-gcd_11/mpn/generic/gcd_1.c *** gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_1.c 2019-08-05 11:07:24.000000000 +0200 --- gmp-gcd_11/mpn/generic/gcd_1.c 2019-08-05 22:10:48.042277000 +0200 *************** mpn_gcd_1 (mp_srcptr up, mp_size_t size, *** 71,75 **** count_trailing_zeros (c, ulimb); ! ulimb = (ulimb >> 1) >> c; } else --- 71,75 ---- count_trailing_zeros (c, ulimb); ! ulimb >>= c; } else *************** mpn_gcd_1 (mp_srcptr up, mp_size_t size, *** 93,138 **** count_trailing_zeros (c, ulimb); ! ulimb = (ulimb >> 1) >> c; } - else - { - ASSERT (ulimb & 1); - ulimb >>= 1; - } - } - - ASSERT (vlimb & 1); - vlimb >>= 1; - - /* In this loop, we represent the odd numbers ulimb and vlimb - without the redundant least significant one bit. This reduction - in size by one bit ensures that the high bit of t, below, is set - if and only if vlimb > ulimb. */ - while (ulimb != vlimb) - { - mp_limb_t t; - mp_limb_t vgtu; - - t = ulimb - vlimb; - vgtu = LIMB_HIGHBIT_TO_MASK (t); - - /* v <-- min (u, v) */ - vlimb += (vgtu & t); - - /* u <-- |u - v| */ - ulimb = (t ^ vgtu) - vgtu; - - count_trailing_zeros (c, t); - /* We have c <= GMP_LIMB_BITS - 2 here, so that - - ulimb >>= (c + 1); - - would be safe. But unlike the addition c + 1, a separate - shift by 1 is independent of c, and can be executed in - parallel with count_trailing_zeros. */ - ulimb = (ulimb >> 1) >> c; } ! vlimb = (vlimb << 1) | 1; done: --- 93,101 ---- count_trailing_zeros (c, ulimb); ! ulimb >>= c; } } ! vlimb = mpn_gcd_11 (ulimb, vlimb); done:
Seems to pass existing tests/mpz/t_gcd_ui. Regards, /Niels PS. It seems I tried something similar back in 2014, but that was before the cleanup where we deleted unused gcd_1 code variants and tricky gotos. -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance.
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel