Attached is a patch to move the current (slow) gcd_2 to a separate file, rename
it mpn_gcd_22, change argument and return type, and add a basic test.

Do you agree with the naming in 

typedef struct
{
  mp_limb_t d0, d1;
} mp_double_limb_t;

or should it be something else?

Next would be speed support, and then it's easier to try ways to make it
faster.

Regards,
/Niels

diff -r e8e308deeae5 configure.ac
--- a/configure.ac	Wed Aug 14 01:57:21 2019 +0200
+++ b/configure.ac	Thu Aug 15 21:02:40 2019 +0200
@@ -2970,7 +2970,7 @@ gmp_mpn_functions="$extra_functions					
   rootrem sqrtrem sizeinbase get_str set_str compute_powtab		   \
   scan0 scan1 popcount hamdist cmp zero_p				   \
   perfsqr perfpow strongfibo						   \
-  gcd_11 gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step			   \
+  gcd_11 gcd_22 gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step		   \
   gcdext_lehmer								   \
   div_q tdiv_qr jacbase jacobi_2 jacobi get_d				   \
   matrix22_mul matrix22_mul1_inverse_vector				   \
diff -r e8e308deeae5 gmp-impl.h
--- a/gmp-impl.h	Wed Aug 14 01:57:21 2019 +0200
+++ b/gmp-impl.h	Thu Aug 15 21:02:40 2019 +0200
@@ -3968,6 +3968,14 @@ union double_extract
 #define PP_FIRST_OMITTED 3
 #endif
 
+typedef struct
+{
+  mp_limb_t d0, d1;
+} mp_double_limb_t;
+
+#define mpn_gcd_22 __MPN (gcd_22)
+__GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
 /* BIT1 means a result value in bit 1 (second least significant bit), with a
    zero bit representing +1 and a one bit representing -1.  Bits other than
    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
diff -r e8e308deeae5 mpn/generic/gcd.c
--- a/mpn/generic/gcd.c	Wed Aug 14 01:57:21 2019 +0200
+++ b/mpn/generic/gcd.c	Thu Aug 15 21:02:40 2019 +0200
@@ -76,61 +76,6 @@ gcd_hook (void *p, mp_srcptr gp, mp_size
   ctx->gn = gn;
 }
 
-#if GMP_NAIL_BITS > 0
-/* Nail supports should be easy, replacing the sub_ddmmss with nails
- * logic. */
-#error Nails not supported.
-#endif
-
-/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
-   Both U and V must be odd. */
-static inline mp_size_t
-gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
-{
-  mp_limb_t u0, u1, v0, v1;
-  mp_size_t gn;
-
-  u0 = up[0];
-  u1 = up[1];
-  v0 = vp[0];
-  v1 = vp[1];
-
-  ASSERT (u0 & 1);
-  ASSERT (v0 & 1);
-
-  /* Check for u0 != v0 needed to ensure that argument to
-   * count_trailing_zeros is non-zero. */
-  while (u1 != v1 && u0 != v0)
-    {
-      unsigned long int r;
-      if (u1 > v1)
-	{
-	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
-	  count_trailing_zeros (r, u0);
-	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
-	  u1 >>= r;
-	}
-      else  /* u1 < v1.  */
-	{
-	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
-	  count_trailing_zeros (r, v0);
-	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
-	  v1 >>= r;
-	}
-    }
-
-  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
-
-  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
-  if (u1 == v1 && u0 == v0)
-    return gn;
-
-  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
-  gp[0] = mpn_gcd_1 (gp, gn, v0);
-
-  return 1;
-}
-
 mp_size_t
 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
 {
@@ -301,8 +246,12 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t
       vp[1] >>= r;
     }
 
-  ctx.gn = gcd_2(gp, up, vp);
-
+  {
+    mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
+    gp[0] = g.d0;
+    gp[1] = g.d1;
+    ctx.gn = 1 + (g.d1 > 0);
+  }
 done:
   TMP_FREE;
   return ctx.gn;
diff -r e8e308deeae5 mpn/generic/gcd_22.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/gcd_22.c	Thu Aug 15 21:02:40 2019 +0200
@@ -0,0 +1,81 @@
+/* mpn_gcd_22 -- double limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+mp_double_limb_t
+mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0)
+{
+  mp_double_limb_t g;
+  ASSERT (u0 & 1);
+  ASSERT (v0 & 1);
+
+  /* Check for u0 != v0 needed to ensure that argument to
+   * count_trailing_zeros is non-zero. */
+  while (u1 != v1 && u0 != v0)
+    {
+      unsigned long int r;
+      if (u1 > v1)
+	{
+	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
+	  count_trailing_zeros (r, u0);
+	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
+	  u1 >>= r;
+	}
+      else  /* u1 < v1.  */
+	{
+	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
+	  count_trailing_zeros (r, v0);
+	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
+	  v1 >>= r;
+	}
+    }
+
+  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
+  if (u1 == v1 && u0 == v0)
+    {
+      g.d0 = u0; g.d1 = u1;
+      return g;
+    }
+  else {
+    mp_limb_t t[2];
+    t[0] = u0; t[1] = u1;
+
+    v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
+    g.d0 = mpn_gcd_1 (t, 1 + (u1 != 0), v0);
+    g.d1 = 0;
+    return g;
+  }
+}
diff -r e8e308deeae5 tests/mpn/Makefile.am
--- a/tests/mpn/Makefile.am	Wed Aug 14 01:57:21 2019 +0200
+++ b/tests/mpn/Makefile.am	Thu Aug 15 21:02:40 2019 +0200
@@ -29,7 +29,7 @@ check_PROGRAMS = t-asmtype t-aors_1 t-di
   t-toom2-sqr t-toom3-sqr t-toom4-sqr t-toom6-sqr t-toom8-sqr		\
   t-div t-mul t-mullo t-sqrlo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid	\
   t-hgcd t-hgcd_appr t-matrix22 t-invert t-bdiv t-fib2m			\
-  t-broot t-brootinv t-minvert t-sizeinbase t-gcd_11
+  t-broot t-brootinv t-minvert t-sizeinbase t-gcd_11 t-gcd_22
 
 EXTRA_DIST = toom-shared.h toom-sqr-shared.h
 
diff -r e8e308deeae5 tests/mpn/t-gcd_22.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mpn/t-gcd_22.c	Thu Aug 15 21:02:40 2019 +0200
@@ -0,0 +1,84 @@
+/* Test mpn_gcd_22.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef COUNT
+#define COUNT 10000
+#endif
+
+static void
+one_test (mpz_srcptr a, mpz_srcptr b, mpz_srcptr ref)
+{
+  mp_double_limb_t r = mpn_gcd_22 (mpz_getlimbn (a, 1), mpz_getlimbn (a, 0),
+				   mpz_getlimbn (b, 1), mpz_getlimbn (b, 0));
+  if (r.d0 != mpz_getlimbn (ref, 0) || r.d1 != mpz_getlimbn (ref, 1))
+    {
+      gmp_fprintf (stderr,
+		   "gcd_22 (0x%Zx, 0x%Zx) failed, got: g1 = 0x%Mx g0 = %Mx, ref: 0x%Zx\n",
+                   a, b, r.d1, r.d0, ref);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, b, ref;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (ref);
+  for (test = 0; test < count; test++)
+    {
+      mp_limb_t al, bl;
+      mp_bitcnt_t size = 1 + gmp_urandomm_ui(rands, 2*GMP_NUMB_BITS);
+      if (test & 1)
+	{
+	  mpz_urandomb (a, rands, size);
+	  mpz_urandomb (b, rands, size);
+	}
+      else
+	{
+	  mpz_rrandomb (a, rands, size);
+	  mpz_rrandomb (b, rands, size);
+	}
+
+      mpz_setbit (a, 0);
+      mpz_setbit (b, 0);
+      refmpz_gcd (ref, a, b);
+      one_test (a, b, ref);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (ref);
+}
diff -r e8e308deeae5 tests/refmpz.c
--- a/tests/refmpz.c	Wed Aug 14 01:57:21 2019 +0200
+++ b/tests/refmpz.c	Thu Aug 15 21:02:40 2019 +0200
@@ -78,6 +78,52 @@ refmpz_hamdist (mpz_srcptr x, mpz_srcptr
   return ret;
 }
 
+void
+refmpz_gcd (mpz_ptr g, mpz_srcptr a_orig, mpz_srcptr b_orig)
+{
+  mp_bitcnt_t a_twos, b_twos, common_twos;
+  mpz_t a;
+  mpz_t b;
+  mpz_init (a);
+  mpz_init (b);
+  mpz_abs (a, a_orig);
+  mpz_abs (b, b_orig);
+
+  if (mpz_sgn (a) == 0)
+    {
+      mpz_set (g, b);
+      return;
+    }
+  if (mpz_sgn (b) == 0)
+    {
+      mpz_set (g, a);
+      return;
+    }
+  a_twos = mpz_scan1 (a, 0);
+  mpz_tdiv_q_2exp (a, a, a_twos);
+
+  b_twos = mpz_scan1 (b, 0);
+  mpz_tdiv_q_2exp (b, b, b_twos);
+
+  common_twos = MIN(a_twos, b_twos);
+  for (;;)
+    {
+      int c;
+      mp_bitcnt_t twos;
+      c = mpz_cmp (a, b);
+      if (c == 0)
+	break;
+      if (c < 0)
+	mpz_swap (a, b);
+      mpz_sub (a, a, b);
+      twos = mpz_scan1 (a, 0);
+      mpz_tdiv_q_2exp (a, a, twos);
+    }
+  mpz_mul_2exp (g, a, common_twos);
+
+  mpz_clear (a);
+  mpz_clear (b);
+}
 
 /* (0/b), with mpz b; is 1 if b=+/-1, 0 otherwise */
 #define JACOBI_0Z(b)  JACOBI_0LS (PTR(b)[0], SIZ(b))
diff -r e8e308deeae5 tests/tests.h
--- a/tests/tests.h	Wed Aug 14 01:57:21 2019 +0200
+++ b/tests/tests.h	Thu Aug 15 21:02:40 2019 +0200
@@ -356,6 +356,7 @@ void refmpq_sub (mpq_ptr, mpq_srcptr, mp
 
 void refmpz_combit (mpz_ptr, unsigned long);
 unsigned long refmpz_hamdist (mpz_srcptr, mpz_srcptr);
+void refmpz_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
 int refmpz_kronecker (mpz_srcptr, mpz_srcptr);
 int refmpz_jacobi (mpz_srcptr, mpz_srcptr);
 int refmpz_legendre (mpz_srcptr, mpz_srcptr);
-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.

_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to