Ciao,

Il Dom, 1 Settembre 2019 1:30 am, Torbjörn Granlund ha scritto:
> "Marco Bodrato" <bodr...@mail.dm.unipi.it> writes:
>   I think that the scaling trick is interesting, and should be always
>   used. Does this mean that we should only use submul? Well...
>   let's use submul only :-)
>
> We'll need negation, I suppose.  Or a rsbmul_1 which we choose wisely.

In an unlikely branch.

>   Please try the attached code. It's based on yours.

> I will look at it and test it after some sleep!

Before you wake up, I send you another version. I added two more bits to
the table entries, and started using one with some bit trickery
(exploiting symmetries). It should be able to gain a bit of "speed", but
in the unlikely  case of operands of very different sizes only.

Ĝis,
m

-- 
http://bodrato.it/papers/
#include "gmp-impl.h"
#include "longlong.h"

#if defined (__amd64__) && W_TYPE_SIZE == 64
#define CMP_SWAP(ap,bp,n)						\
  do {									\
    mp_limb_t tmp;							\
    __asm__ ("cmp\t%5, %6"	"\n\t"					\
	     "mov\t%1, %0"	"\n\t"					\
	     "cmovae\t%2, %1"	"\n\t"					\
	     "cmovae\t%0, %2"						\
	     : "=&r,&r" (tmp), "=r,r" (ap), "=r,r" (bp)			\
	     : "1,1" (ap), "2,2" (bp), "r,rm" (ap[n-1]), "rm,r" (bp[n-1])); \
  } while (0)
#endif

#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n)						\
  do {									\
    mp_limb_t m = -(mp_limb_t) (ap[n - 1] < bp[n - 1]);			\
    mp_limb_t *_tp = ap;						\
    ap = (mp_ptr) (((size_t) ap & ~m) | ((size_t) bp & m));		\
    bp = (mp_ptr) (((size_t) bp & ~m) | ((size_t) _tp & m));		\
  } while (0)
#endif

#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n)						\
  do {									\
    if (ap[n - 1] < bp[n - 1])						\
      {									\
	mp_limb_t *_tp = ap; ap = bp; bp = _tp;				\
      }									\
  } while (0)
#endif

void
mpn_gcd_n (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, mp_size_t n)
{
  static unsigned char tabp[256] = {
  127,  85,  51,  73,  71,  93,  59,  17,  15, 101,  67,  89,  87, 109,  75,  33,
  125, 127,  25,  91,  85,  23,  49,  51,  45,  47,  73,  11,   5,  71,  97,  99,
  123,  41, 127, 109,  99,  81,  39,  85,  75, 121,  79,  61,  51,  33, 119,  37,
  121,  83, 101, 127, 113,  11,  29, 119, 105,  67,  85, 111,  97, 123,  13, 103,
  119, 125,  75,  17, 127,  69,  19,  25,   7,  13,  91,  33,  15,  85,  35,  41,
  117,  39,  49,  35,  13, 127,   9,  59,  37,  87,  97,  83,  61,  47,  57, 107,
  115,  81,  23,  53,  27,  57, 127,  93,  67,  33, 103,   5, 107,   9,  79,  45,
  113, 123, 125,  71,  41, 115, 117, 127,  97, 107, 109,  55,  25,  99, 101, 111,
  111,  37,  99,  89,  55,  45, 107,  33, 127,  53, 115, 105,  71,  61, 123,  49,
  109,  79,  73, 107,  69, 103,  97,  67,  29, 127, 121,  27, 117,  23,  17, 115,
  107, 121,  47, 125,  83,  33,  87, 101,  59,  73, 127,  77,  35, 113,  39,  53,
  105,  35,  21,  15,  97,  91,  77,   7,  89,  19,   5, 127,  81,  75,  61, 119,
  103,  77, 123,  33, 111,  21,  67,  41, 119,  93,  11,  49, 127,  37,  83,  57,
  101, 119,  97,  51, 125,  79,  57,  75,  21,  39,  17,  99,  45, 127, 105, 123,
   99,  33,  71,  69,  11,   9,  47, 109,  51, 113,  23,  21,  91,  89, 127,  61,
   97,  75,  45,  87,  25,  67,  37,  15,  81,  59,  29,  71,   9,  51,  21, 127,
  };

  const mp_limb_t mask = (2 << 5) - 2;

  MPN_FILL (rp, n, 0);

  while (n > 2)
    {
      mp_limb_t a, b, m, cy, maska, maskb;
      int cnt, tcnt, ucnt, vcnt;

      CMP_SWAP (up, vp, n);

      a = up[0];
      maska = -((a >> 4) & 2);
      b = vp[0];
      maskb = -((b >> 4) & 2);
      m = GMP_NUMB_MAX ^ ((tabp[(((a^maska) & mask) << (4 - 1)) + (((b^maskb) & mask) >> 1)]^maska^maskb) & mask);

      count_leading_zeros (ucnt, up[n - 1]);
      count_leading_zeros (tcnt, vp[n - 2] | up[n - 1]);
      count_leading_zeros (vcnt, vp[n - 1] | 1);

      cnt = (vp[n - 1] == 0 ? GMP_NUMB_BITS + tcnt : vcnt) - ucnt;
      ASSERT (0 <= cnt && cnt <= GMP_NUMB_BITS);
      m &= GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1));
      cy = mpn_submul_1 (up, vp, n, m);

      if (UNLIKELY (cy))
	mpn_neg (up, up, n);

      a = up[0];
      ASSERT_ALWAYS ((a & 0x3f & (GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1)))) == 0);

      if (UNLIKELY (a == 0))
	{
	  mp_size_t i;
	  for (i = 1; up[i] == 0;)
	    if (++i == n)
	      {
		mpn_copyi (rp, vp, n);
		return;
	      }

	  if ((up[i] & 1) == 0)
	    {
	      int cnt;
	      count_trailing_zeros (cnt, up[i]);
	      mpn_rshift (up, up + i, n - i, cnt);
	    }
	  else
	    {
	      mpn_copyi (up, up + i, n - i);
	    }
	  MPN_FILL (up + n - i, i, 0);
	}
      else
	{
	  count_trailing_zeros (cnt, a);
	  mpn_rshift (up, up, n, cnt);
	}

      n -= (up[n - 1] | vp[n - 1]) == 0;
    }

 mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
 rp[0] = g.d0;
 rp[1] = g.d1;
}

void
mpn_gcd_33 (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
  mpn_gcd_n (rp, up, vp, 3);
}
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to