Ciao,
Il Dom, 1 Settembre 2019 6:35 pm, Torbjörn Granlund ha scritto:
> "Marco Bodrato" <bodr...@mail.dm.unipi.it> writes:
> My code: 1.85 bits/iteration
> Your code: 2.60 bits/iteration
> It is worth remembering the table from my experiments with single-limb
> numbers:
> binary tab64 : 479879082 0.401 2.495 40
> binary tab256 : 456126047 0.381 2.625 40
> binary tab1024 : 434483855 0.363 2.755 36
> The now observed 2.60 bits/iteration is very close to the 2.625
> bits/iteration from the table.
My code uses a 256 table... but extends it to a 1024 one...
What does the "binary tab###" single-limb implementation exactly do?
> (I looked briefly at your code, and don't understand it. I haven't had
> time for a proper look yet.)
Yes, storing some unused bits in the table was not a good idea for clarity.
I send it again, I changed some variable names, added some comments and
some #ifdef to experiment the different flavours.
I get:
-DNO_TG_TRICK -DTAB256 -DNLIMBS=10 -DSTAT=1
bits/iteration avg: 2.45
-DTAB256 -DNLIMBS=10 -DSTAT=1
bits/iteration avg: 2.57
-DNO_TG_TRICK -DNLIMBS=10 -DSTAT=1
bits/iteration avg: 2.53
-DNLIMBS=10 -DSTAT=1
bits/iteration avg: 2.60
I added also an "#if 0"-ed bad idea :-)
Ĝis,
m
--
http://bodrato.it/papers/
#include "gmp-impl.h"
#include "longlong.h"
#if defined (__amd64__) && W_TYPE_SIZE == 64
#define CMP_SWAP(ap,bp,n) \
do { \
mp_limb_t tmp; \
__asm__ ("cmp\t%5, %6" "\n\t" \
"mov\t%1, %0" "\n\t" \
"cmovae\t%2, %1" "\n\t" \
"cmovae\t%0, %2" \
: "=&r,&r" (tmp), "=r,r" (ap), "=r,r" (bp) \
: "1,1" (ap), "2,2" (bp), "r,rm" (ap[n-1]), "rm,r" (bp[n-1])); \
} while (0)
#endif
#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n) \
do { \
mp_limb_t m = -(mp_limb_t) (ap[n - 1] < bp[n - 1]); \
mp_limb_t *_tp = ap; \
ap = (mp_ptr) (((size_t) ap & ~m) | ((size_t) bp & m)); \
bp = (mp_ptr) (((size_t) bp & ~m) | ((size_t) _tp & m)); \
} while (0)
#endif
#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n) \
do { \
if (ap[n - 1] < bp[n - 1]) \
{ \
mp_limb_t *_tp = ap; ap = bp; bp = _tp; \
} \
} while (0)
#endif
#ifdef STAT
#define IFSTAT(x) x
#else
#define IFSTAT(x)
#endif
IFSTAT(extern size_t n_iter;)
#if NO_TG_TRICK
#define NEG_MASK (mask ^ 1)
#else
#define NEG_MASK GMP_NUMB_MAX
#endif
void
mpn_gcd_n (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, mp_size_t n)
{
static unsigned char tabp[256] = {
63, 21, 51, 9, 7, 29, 59, 17, 15, 37, 3, 25, 23, 45, 11, 33,
61, 63, 25, 27, 21, 23, 49, 51, 45, 47, 9, 11, 5, 7, 33, 35,
59, 41, 63, 45, 35, 17, 39, 21, 11, 57, 15, 61, 51, 33, 55, 37,
57, 19, 37, 63, 49, 11, 29, 55, 41, 3, 21, 47, 33, 59, 13, 39,
55, 61, 11, 17, 63, 5, 19, 25, 7, 13, 27, 33, 15, 21, 35, 41,
53, 39, 49, 35, 13, 63, 9, 59, 37, 23, 33, 19, 61, 47, 57, 43,
51, 17, 23, 53, 27, 57, 63, 29, 3, 33, 39, 5, 43, 9, 15, 45,
49, 59, 61, 7, 41, 51, 53, 63, 33, 43, 45, 55, 25, 35, 37, 47,
47, 37, 35, 25, 55, 45, 43, 33, 63, 53, 51, 41, 7, 61, 59, 49,
45, 15, 9, 43, 5, 39, 33, 3, 29, 63, 57, 27, 53, 23, 17, 51,
43, 57, 47, 61, 19, 33, 23, 37, 59, 9, 63, 13, 35, 49, 39, 53,
41, 35, 21, 15, 33, 27, 13, 7, 25, 19, 5, 63, 17, 11, 61, 55,
39, 13, 59, 33, 47, 21, 3, 41, 55, 29, 11, 49, 63, 37, 19, 57,
37, 55, 33, 51, 61, 15, 57, 11, 21, 39, 17, 35, 45, 63, 41, 59,
35, 33, 7, 5, 11, 9, 47, 45, 51, 49, 23, 21, 27, 25, 63, 61,
33, 11, 45, 23, 25, 3, 37, 15, 17, 59, 29, 7, 9, 51, 21, 63
};
#ifdef TAB256
const mp_limb_t mask = (2 << 4) - 2;
#else
const mp_limb_t mask = (2 << 5) - 2;
#endif
MPN_FILL (rp, n, 0);
while (n > 2)
{
mp_limb_t a, b, m, cy, signa, signb, maskm;
int cnt, v2cnt, ucnt, vcnt;
IFSTAT(n_iter++);
CMP_SWAP (up, vp, n);
a = up[0];
b = vp[0];
/* Let m = -tabp[] = GMP_NUMB_MAX ^ (tabp[] & mask) */
#ifdef TAB256
m = NEG_MASK ^ (tabp[((a & mask) << (4 - 1)) + ((b & mask) >> 1)] & mask);
#else
signa = -((a >> 4) & 2);
signb = -((b >> 4) & 2);
/* tabp[- a, b] == - tabp[a, b]; tabp[a, - b] == - tabp[a, b]; tabp[- a, - b] == - - tabp[a, b] */
m = NEG_MASK ^ ((tabp[(((a^signa) & mask) << (4 - 1)) + (((b^signb) & mask) >> 1)]^signa^signb) & mask);
#endif
count_leading_zeros (ucnt, up[n - 1]);
count_leading_zeros (v2cnt, vp[n - 2] | up[n - 1]);
ASSERT (v2cnt <= ucnt); /* We do not care if V is more than one full limb shorter than U*/
/* if vp[n-1] == 0 we will discard the value, otherwise clz(vp[n-1] | 1) = clz(vp[n-1]) */
count_leading_zeros (vcnt, vp[n - 1] | 1);
/* Compute difference in bitsize, GMP_NUMB_BITS if too large. */
cnt = (vp[n - 1] == 0 ? GMP_NUMB_BITS + v2cnt : vcnt) - ucnt;
ASSERT (0 <= cnt && cnt <= GMP_NUMB_BITS);
/* maskm = ((1 << cnt) - 1) | 1 */
maskm = GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1));
m &= maskm;
#if 0 && HAVE_NATIVE_mpn_rsh1add_n
if (UNLIKELY ((((maskm << 1) | 7) & (a+b)) == 0))
{
ASSERT_NOCARRY (mpn_rsh1add_n (up, up, vp, n));
}
else
#endif
{
cy = mpn_submul_1 (up, vp, n, m);
if (UNLIKELY (cy))
mpn_neg (up, up, n);
}
a = up[0];
#ifdef TAB256
ASSERT_ALWAYS ((a & 0x1f & maskm) == 0);
#else
ASSERT_ALWAYS ((a & 0x3f & maskm) == 0);
#endif
if (UNLIKELY (a == 0)) /* lowz */
{
mp_size_t i;
for (i = 1; (a = up[i]) == 0;)
if (++i == n) /* all zeros, we are done. */
{
mpn_copyi (rp, vp, n);
return;
}
if ((a & 1) == 0)
{
int cnt;
count_trailing_zeros (cnt, a);
mpn_rshift (up, up + i, n - i, cnt);
}
else
{
mpn_copyi (up, up + i, n - i);
}
MPN_FILL (up + n - i, i, 0);
}
else
{
count_trailing_zeros (cnt, a);
mpn_rshift (up, up, n, cnt);
}
n -= (up[n - 1] | vp[n - 1]) == 0;
}
mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
rp[0] = g.d0;
rp[1] = g.d1;
}
void
mpn_gcd_33 (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
mpn_gcd_n (rp, up, vp, 3);
}
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel