Re: Small operands gcd improvements

Niels Möller Mon, 05 Aug 2019 13:23:31 -0700

t...@gmplib.org (Torbjörn Granlund) writes:

> ni...@lysator.liu.se (Niels Möller) writes:
>
>   So I see no strong reason to do it one way or the other. Maybe start
>   with requiring both u and v odd, and relax requirements later in case we
>   find any more tangible benefit from doing that?
>
> Makes sense.


Here's a patch to take out gcd_11 to it's own C file, and call it from
gcd_1.c. Needs documentation (if you agree it should be a public
function), and it would be nice with separate tests for this function.

diff -Nprc2 gmp-gcd_11.4c6ae64fa612/configure.ac gmp-gcd_11/configure.ac
*** gmp-gcd_11.4c6ae64fa612/configure.ac	2019-08-05 11:07:24.000000000 +0200
--- gmp-gcd_11/configure.ac	2019-08-05 22:10:48.042277000 +0200
*************** gmp_mpn_functions="$extra_functions
*** 2971,2975 ****
    scan0 scan1 popcount hamdist cmp zero_p				   \
    perfsqr perfpow strongfibo						   \
!   gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step				   \
    gcdext_lehmer								   \
    div_q tdiv_qr jacbase jacobi_2 jacobi get_d				   \
--- 2971,2975 ----
    scan0 scan1 popcount hamdist cmp zero_p				   \
    perfsqr perfpow strongfibo						   \
!   gcd_11 gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step			   \
    gcdext_lehmer								   \
    div_q tdiv_qr jacbase jacobi_2 jacobi get_d				   \
*************** AH_VERBATIM([HAVE_NATIVE],
*** 3560,3563 ****
--- 3560,3564 ----
  #undef HAVE_NATIVE_mpn_divrem_2
  #undef HAVE_NATIVE_mpn_gcd_1
+ #undef HAVE_NATIVE_mpn_gcd_11
  #undef HAVE_NATIVE_mpn_hamdist
  #undef HAVE_NATIVE_mpn_invert_limb
diff -Nprc2 gmp-gcd_11.4c6ae64fa612/gmp-h.in gmp-gcd_11/gmp-h.in
*** gmp-gcd_11.4c6ae64fa612/gmp-h.in	2019-08-05 11:07:24.000000000 +0200
--- gmp-gcd_11/gmp-h.in	2019-08-05 22:10:48.042277000 +0200
*************** __GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (m
*** 1517,1520 ****
--- 1517,1523 ----
  __GMP_DECLSPEC mp_size_t mpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
  
+ #define mpn_gcd_11 __MPN(gcd_11)
+ __GMP_DECLSPEC mp_limb_t mpn_gcd_11 (mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+ 
  #define mpn_gcd_1 __MPN(gcd_1)
  __GMP_DECLSPEC mp_limb_t mpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
diff -Nprc2 gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_11.c gmp-gcd_11/mpn/generic/gcd_11.c
*** gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_11.c	1970-01-01 01:00:00.000000000 +0100
--- gmp-gcd_11/mpn/generic/gcd_11.c	2019-08-05 22:10:48.042277000 +0200
***************
*** 0 ****
--- 1,75 ----
+ /* mpn_gcd_11 -- limb greatest common divisor.
+ 
+ Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+ 
+ This file is part of the GNU MP Library.
+ 
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of either:
+ 
+   * the GNU Lesser General Public License as published by the Free
+     Software Foundation; either version 3 of the License, or (at your
+     option) any later version.
+ 
+ or
+ 
+   * the GNU General Public License as published by the Free Software
+     Foundation; either version 2 of the License, or (at your option) any
+     later version.
+ 
+ or both in parallel, as here.
+ 
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+ 
+ You should have received copies of the GNU General Public License and the
+ GNU Lesser General Public License along with the GNU MP Library.  If not,
+ see https://www.gnu.org/licenses/.  */
+ 
+ #include "gmp-impl.h"
+ #include "longlong.h"
+ 
+ #if !HAVE_NATIVE_mpn_gcd_11
+ mp_limb_t
+ mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+ {
+   ASSERT (u & v & 1);
+ 
+   /* In this loop, we represent the odd numbers ulimb and vlimb
+      without the redundant least significant one bit. This reduction
+      in size by one bit ensures that the high bit of t, below, is set
+      if and only if vlimb > ulimb. */
+ 
+   u >>= 1;
+   v >>= 1;
+ 
+   while (u != v)
+     {
+       mp_limb_t t;
+       mp_limb_t vgtu;
+       int c;
+ 
+       t = u - v;
+       vgtu = LIMB_HIGHBIT_TO_MASK (t);
+ 
+       /* v <-- min (u, v) */
+       v += (vgtu & t);
+ 
+       /* u <-- |u - v| */
+       u = (t ^ vgtu) - vgtu;
+ 
+       count_trailing_zeros (c, t);
+       /* We have c <= GMP_LIMB_BITS - 2 here, so that
+ 
+ 	   ulimb >>= (c + 1);
+ 
+ 	 would be safe. But unlike the addition c + 1, a separate
+ 	 shift by 1 is independent of c, and can be executed in
+ 	 parallel with count_trailing_zeros. */
+       u = (u >> 1) >> c;
+     }
+   return (u << 1) + 1;
+ }
+ #endif /* !HAVE_NATIVE_mpn_gcd_11 */
diff -Nprc2 gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_1.c gmp-gcd_11/mpn/generic/gcd_1.c
*** gmp-gcd_11.4c6ae64fa612/mpn/generic/gcd_1.c	2019-08-05 11:07:24.000000000 +0200
--- gmp-gcd_11/mpn/generic/gcd_1.c	2019-08-05 22:10:48.042277000 +0200
*************** mpn_gcd_1 (mp_srcptr up, mp_size_t size,
*** 71,75 ****
  
        count_trailing_zeros (c, ulimb);
!       ulimb = (ulimb >> 1) >> c;
      }
    else
--- 71,75 ----
  
        count_trailing_zeros (c, ulimb);
!       ulimb >>= c;
      }
    else
*************** mpn_gcd_1 (mp_srcptr up, mp_size_t size,
*** 93,138 ****
  
  	  count_trailing_zeros (c, ulimb);
! 	  ulimb = (ulimb >> 1) >> c;
  	}
-       else
- 	{
- 	  ASSERT (ulimb & 1);
- 	  ulimb >>= 1;
- 	}
-     }
- 
-   ASSERT (vlimb & 1);
-   vlimb >>= 1;
- 
-   /* In this loop, we represent the odd numbers ulimb and vlimb
-      without the redundant least significant one bit. This reduction
-      in size by one bit ensures that the high bit of t, below, is set
-      if and only if vlimb > ulimb. */
-   while (ulimb != vlimb)
-     {
-       mp_limb_t t;
-       mp_limb_t vgtu;
- 
-       t = ulimb - vlimb;
-       vgtu = LIMB_HIGHBIT_TO_MASK (t);
- 
-       /* v <-- min (u, v) */
-       vlimb += (vgtu & t);
- 
-       /* u <-- |u - v| */
-       ulimb = (t ^ vgtu) - vgtu;
- 
-       count_trailing_zeros (c, t);
-       /* We have c <= GMP_LIMB_BITS - 2 here, so that
- 
- 	   ulimb >>= (c + 1);
- 
- 	 would be safe. But unlike the addition c + 1, a separate
- 	 shift by 1 is independent of c, and can be executed in
- 	 parallel with count_trailing_zeros. */
-       ulimb = (ulimb >> 1) >> c;
      }
  
!   vlimb = (vlimb << 1) | 1;
  
   done:
--- 93,101 ----
  
  	  count_trailing_zeros (c, ulimb);
! 	  ulimb >>= c;
  	}
      }
  
!   vlimb = mpn_gcd_11 (ulimb, vlimb);
  
   done:

Seems to pass existing tests/mpz/t_gcd_ui.

Regards,
/Niels

PS. It seems I tried something similar back in 2014, but that was before
the cleanup where we deleted unused gcd_1 code variants and tricky
gotos.

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.

_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Re: Small operands gcd improvements

Reply via email to