Ciao,

Il Sab, 31 Agosto 2019 7:07 pm, Torbjörn Granlund ha scritto:
>   > Sounds like the opposite of binary euclid.
>   Sounds more interesting than binary euclid, to me. Because looking at

> I suppose I don't fully understand how to make a table-based binary

Why should you try that if your idea is more interesting? :-)

> the time.  The scaling trick only applies to the submul_1 cases.  The

I think that the scaling trick is interesting, and should be always used.
Does this mean that we should only use submul? Well... let's use submul
only :-)

Please try the attached code. It's based on yours.

I'm not sure if the multiplier is chosen with the best strategy, but it
seems reasonable.

Ĝis,
m

-- 
http://bodrato.it/papers/
#include "gmp-impl.h"
#include "longlong.h"

#if defined (__amd64__) && W_TYPE_SIZE == 64
#define CMP_SWAP(ap,bp,n)						\
  do {									\
    mp_limb_t tmp;							\
    __asm__ ("cmp\t%5, %6"	"\n\t"					\
	     "mov\t%1, %0"	"\n\t"					\
	     "cmovae\t%2, %1"	"\n\t"					\
	     "cmovae\t%0, %2"						\
	     : "=&r,&r" (tmp), "=r,r" (ap), "=r,r" (bp)			\
	     : "1,1" (ap), "2,2" (bp), "r,rm" (ap[n-1]), "rm,r" (bp[n-1])); \
  } while (0)
#endif

#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n)						\
  do {									\
    mp_limb_t m = -(mp_limb_t) (ap[n - 1] < bp[n - 1]);			\
    mp_limb_t *_tp = ap;						\
    ap = (mp_ptr) (((size_t) ap & ~m) | ((size_t) bp & m));		\
    bp = (mp_ptr) (((size_t) bp & ~m) | ((size_t) _tp & m));		\
  } while (0)
#endif

#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n)						\
  do {									\
    if (ap[n - 1] < bp[n - 1])						\
      {									\
	mp_limb_t *_tp = ap; ap = bp; bp = _tp;				\
      }									\
  } while (0)
#endif

void
mpn_gcd_n (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, mp_size_t n)
{
  static unsigned char tabp[256] = {
 30, 20, 18,  8,  6, 28, 26, 16, 14,  4,  2, 24, 22, 12, 10,  0,
 28, 30, 24, 26, 20, 22, 16, 18, 12, 14,  8, 10,  4,  6,  0,  2,
 26,  8, 30, 12,  2, 16,  6, 20, 10, 24, 14, 28, 18,  0, 22,  4,
 24, 18,  4, 30, 16, 10, 28, 22,  8,  2, 20, 14,  0, 26, 12,  6,
 22, 28, 10, 16, 30,  4, 18, 24,  6, 12, 26,  0, 14, 20,  2,  8,
 20,  6, 16,  2, 12, 30,  8, 26,  4, 22,  0, 18, 28, 14, 24, 10,
 18, 16, 22, 20, 26, 24, 30, 28,  2,  0,  6,  4, 10,  8, 14, 12,
 16, 26, 28,  6,  8, 18, 20, 30,  0, 10, 12, 22, 24,  2,  4, 14,
 14,  4,  2, 24, 22, 12, 10,  0, 30, 20, 18,  8,  6, 28, 26, 16,
 12, 14,  8, 10,  4,  6,  0,  2, 28, 30, 24, 26, 20, 22, 16, 18,
 10, 24, 14, 28, 18,  0, 22,  4, 26,  8, 30, 12,  2, 16,  6, 20,
  8,  2, 20, 14,  0, 26, 12,  6, 24, 18,  4, 30, 16, 10, 28, 22,
  6, 12, 26,  0, 14, 20,  2,  8, 22, 28, 10, 16, 30,  4, 18, 24,
  4, 22,  0, 18, 28, 14, 24, 10, 20,  6, 16,  2, 12, 30,  8, 26,
  2,  0,  6,  4, 10,  8, 14, 12, 18, 16, 22, 20, 26, 24, 30, 28,
  0, 10, 12, 22, 24,  2,  4, 14, 16, 26, 28,  6,  8, 18, 20, 30
  };

  const mp_limb_t mask = (2 << 4) - 2;

  MPN_FILL (rp, n, 0);

  while (n > 2)
    {
      mp_limb_t a, b, m, cy;
      int cnt, tcnt, ucnt, vcnt;

      CMP_SWAP (up, vp, n);

      a = up[0];
      b = vp[0];
      m = GMP_NUMB_MAX ^ tabp[((a & mask) << (4 - 1)) + ((b & mask) >> 1)];

      count_leading_zeros (ucnt, up[n - 1]);
      count_leading_zeros (tcnt, vp[n - 2] | up[n - 1]);
      count_leading_zeros (vcnt, vp[n - 1] | 1);

      cnt = (vp[n - 1] == 0 ? GMP_NUMB_BITS + tcnt : vcnt) - ucnt;
      ASSERT (0 <= cnt && cnt <= GMP_NUMB_BITS);
      m &= GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1));
      cy = mpn_submul_1 (up, vp, n, m);

      if (UNLIKELY (cy))
	mpn_neg (up, up, n);

      a = up[0];
      ASSERT_ALWAYS ((a & 0x1f & (GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1)))) == 0);

      if (UNLIKELY (a == 0))
	{
	  mp_size_t i;
	  for (i = 1; up[i] == 0;)
	    if (++i == n)
	      {
		mpn_copyi (rp, vp, n);
		return;
	      }

	  if ((up[i] & 1) == 0)
	    {
	      int cnt;
	      count_trailing_zeros (cnt, up[i]);
	      mpn_rshift (up, up + i, n - i, cnt);
	    }
	  else
	    {
	      mpn_copyi (up, up + i, n - i);
	    }
	  MPN_FILL (up + n - i, i, 0);
	}
      else
	{
	  count_trailing_zeros (cnt, a);
	  mpn_rshift (up, up, n, cnt);
	}

      n -= (up[n - 1] | vp[n - 1]) == 0;
    }

 mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
 rp[0] = g.d0;
 rp[1] = g.d1;
}

void
mpn_gcd_33 (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
  mpn_gcd_n (rp, up, vp, 3);
}
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to