Ciao,
Il Sab, 31 Agosto 2019 7:07 pm, Torbjörn Granlund ha scritto:
> > Sounds like the opposite of binary euclid.
> Sounds more interesting than binary euclid, to me. Because looking at
> I suppose I don't fully understand how to make a table-based binary
Why should you try that if your idea is more interesting? :-)
> the time. The scaling trick only applies to the submul_1 cases. The
I think that the scaling trick is interesting, and should be always used.
Does this mean that we should only use submul? Well... let's use submul
only :-)
Please try the attached code. It's based on yours.
I'm not sure if the multiplier is chosen with the best strategy, but it
seems reasonable.
Ĝis,
m
--
http://bodrato.it/papers/
#include "gmp-impl.h"
#include "longlong.h"
#if defined (__amd64__) && W_TYPE_SIZE == 64
#define CMP_SWAP(ap,bp,n) \
do { \
mp_limb_t tmp; \
__asm__ ("cmp\t%5, %6" "\n\t" \
"mov\t%1, %0" "\n\t" \
"cmovae\t%2, %1" "\n\t" \
"cmovae\t%0, %2" \
: "=&r,&r" (tmp), "=r,r" (ap), "=r,r" (bp) \
: "1,1" (ap), "2,2" (bp), "r,rm" (ap[n-1]), "rm,r" (bp[n-1])); \
} while (0)
#endif
#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n) \
do { \
mp_limb_t m = -(mp_limb_t) (ap[n - 1] < bp[n - 1]); \
mp_limb_t *_tp = ap; \
ap = (mp_ptr) (((size_t) ap & ~m) | ((size_t) bp & m)); \
bp = (mp_ptr) (((size_t) bp & ~m) | ((size_t) _tp & m)); \
} while (0)
#endif
#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n) \
do { \
if (ap[n - 1] < bp[n - 1]) \
{ \
mp_limb_t *_tp = ap; ap = bp; bp = _tp; \
} \
} while (0)
#endif
void
mpn_gcd_n (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, mp_size_t n)
{
static unsigned char tabp[256] = {
30, 20, 18, 8, 6, 28, 26, 16, 14, 4, 2, 24, 22, 12, 10, 0,
28, 30, 24, 26, 20, 22, 16, 18, 12, 14, 8, 10, 4, 6, 0, 2,
26, 8, 30, 12, 2, 16, 6, 20, 10, 24, 14, 28, 18, 0, 22, 4,
24, 18, 4, 30, 16, 10, 28, 22, 8, 2, 20, 14, 0, 26, 12, 6,
22, 28, 10, 16, 30, 4, 18, 24, 6, 12, 26, 0, 14, 20, 2, 8,
20, 6, 16, 2, 12, 30, 8, 26, 4, 22, 0, 18, 28, 14, 24, 10,
18, 16, 22, 20, 26, 24, 30, 28, 2, 0, 6, 4, 10, 8, 14, 12,
16, 26, 28, 6, 8, 18, 20, 30, 0, 10, 12, 22, 24, 2, 4, 14,
14, 4, 2, 24, 22, 12, 10, 0, 30, 20, 18, 8, 6, 28, 26, 16,
12, 14, 8, 10, 4, 6, 0, 2, 28, 30, 24, 26, 20, 22, 16, 18,
10, 24, 14, 28, 18, 0, 22, 4, 26, 8, 30, 12, 2, 16, 6, 20,
8, 2, 20, 14, 0, 26, 12, 6, 24, 18, 4, 30, 16, 10, 28, 22,
6, 12, 26, 0, 14, 20, 2, 8, 22, 28, 10, 16, 30, 4, 18, 24,
4, 22, 0, 18, 28, 14, 24, 10, 20, 6, 16, 2, 12, 30, 8, 26,
2, 0, 6, 4, 10, 8, 14, 12, 18, 16, 22, 20, 26, 24, 30, 28,
0, 10, 12, 22, 24, 2, 4, 14, 16, 26, 28, 6, 8, 18, 20, 30
};
const mp_limb_t mask = (2 << 4) - 2;
MPN_FILL (rp, n, 0);
while (n > 2)
{
mp_limb_t a, b, m, cy;
int cnt, tcnt, ucnt, vcnt;
CMP_SWAP (up, vp, n);
a = up[0];
b = vp[0];
m = GMP_NUMB_MAX ^ tabp[((a & mask) << (4 - 1)) + ((b & mask) >> 1)];
count_leading_zeros (ucnt, up[n - 1]);
count_leading_zeros (tcnt, vp[n - 2] | up[n - 1]);
count_leading_zeros (vcnt, vp[n - 1] | 1);
cnt = (vp[n - 1] == 0 ? GMP_NUMB_BITS + tcnt : vcnt) - ucnt;
ASSERT (0 <= cnt && cnt <= GMP_NUMB_BITS);
m &= GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1));
cy = mpn_submul_1 (up, vp, n, m);
if (UNLIKELY (cy))
mpn_neg (up, up, n);
a = up[0];
ASSERT_ALWAYS ((a & 0x1f & (GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1)))) == 0);
if (UNLIKELY (a == 0))
{
mp_size_t i;
for (i = 1; up[i] == 0;)
if (++i == n)
{
mpn_copyi (rp, vp, n);
return;
}
if ((up[i] & 1) == 0)
{
int cnt;
count_trailing_zeros (cnt, up[i]);
mpn_rshift (up, up + i, n - i, cnt);
}
else
{
mpn_copyi (up, up + i, n - i);
}
MPN_FILL (up + n - i, i, 0);
}
else
{
count_trailing_zeros (cnt, a);
mpn_rshift (up, up, n, cnt);
}
n -= (up[n - 1] | vp[n - 1]) == 0;
}
mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
rp[0] = g.d0;
rp[1] = g.d1;
}
void
mpn_gcd_33 (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
mpn_gcd_n (rp, up, vp, 3);
}
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel