Ciao,
Il Dom, 1 Settembre 2019 1:30 am, Torbjörn Granlund ha scritto:
> "Marco Bodrato" <bodr...@mail.dm.unipi.it> writes:
> I think that the scaling trick is interesting, and should be always
> used. Does this mean that we should only use submul? Well...
> let's use submul only :-)
>
> We'll need negation, I suppose. Or a rsbmul_1 which we choose wisely.
In an unlikely branch.
> Please try the attached code. It's based on yours.
> I will look at it and test it after some sleep!
Before you wake up, I send you another version. I added two more bits to
the table entries, and started using one with some bit trickery
(exploiting symmetries). It should be able to gain a bit of "speed", but
in the unlikely case of operands of very different sizes only.
Ĝis,
m
--
http://bodrato.it/papers/
#include "gmp-impl.h"
#include "longlong.h"
#if defined (__amd64__) && W_TYPE_SIZE == 64
#define CMP_SWAP(ap,bp,n) \
do { \
mp_limb_t tmp; \
__asm__ ("cmp\t%5, %6" "\n\t" \
"mov\t%1, %0" "\n\t" \
"cmovae\t%2, %1" "\n\t" \
"cmovae\t%0, %2" \
: "=&r,&r" (tmp), "=r,r" (ap), "=r,r" (bp) \
: "1,1" (ap), "2,2" (bp), "r,rm" (ap[n-1]), "rm,r" (bp[n-1])); \
} while (0)
#endif
#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n) \
do { \
mp_limb_t m = -(mp_limb_t) (ap[n - 1] < bp[n - 1]); \
mp_limb_t *_tp = ap; \
ap = (mp_ptr) (((size_t) ap & ~m) | ((size_t) bp & m)); \
bp = (mp_ptr) (((size_t) bp & ~m) | ((size_t) _tp & m)); \
} while (0)
#endif
#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n) \
do { \
if (ap[n - 1] < bp[n - 1]) \
{ \
mp_limb_t *_tp = ap; ap = bp; bp = _tp; \
} \
} while (0)
#endif
void
mpn_gcd_n (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, mp_size_t n)
{
static unsigned char tabp[256] = {
127, 85, 51, 73, 71, 93, 59, 17, 15, 101, 67, 89, 87, 109, 75, 33,
125, 127, 25, 91, 85, 23, 49, 51, 45, 47, 73, 11, 5, 71, 97, 99,
123, 41, 127, 109, 99, 81, 39, 85, 75, 121, 79, 61, 51, 33, 119, 37,
121, 83, 101, 127, 113, 11, 29, 119, 105, 67, 85, 111, 97, 123, 13, 103,
119, 125, 75, 17, 127, 69, 19, 25, 7, 13, 91, 33, 15, 85, 35, 41,
117, 39, 49, 35, 13, 127, 9, 59, 37, 87, 97, 83, 61, 47, 57, 107,
115, 81, 23, 53, 27, 57, 127, 93, 67, 33, 103, 5, 107, 9, 79, 45,
113, 123, 125, 71, 41, 115, 117, 127, 97, 107, 109, 55, 25, 99, 101, 111,
111, 37, 99, 89, 55, 45, 107, 33, 127, 53, 115, 105, 71, 61, 123, 49,
109, 79, 73, 107, 69, 103, 97, 67, 29, 127, 121, 27, 117, 23, 17, 115,
107, 121, 47, 125, 83, 33, 87, 101, 59, 73, 127, 77, 35, 113, 39, 53,
105, 35, 21, 15, 97, 91, 77, 7, 89, 19, 5, 127, 81, 75, 61, 119,
103, 77, 123, 33, 111, 21, 67, 41, 119, 93, 11, 49, 127, 37, 83, 57,
101, 119, 97, 51, 125, 79, 57, 75, 21, 39, 17, 99, 45, 127, 105, 123,
99, 33, 71, 69, 11, 9, 47, 109, 51, 113, 23, 21, 91, 89, 127, 61,
97, 75, 45, 87, 25, 67, 37, 15, 81, 59, 29, 71, 9, 51, 21, 127,
};
const mp_limb_t mask = (2 << 5) - 2;
MPN_FILL (rp, n, 0);
while (n > 2)
{
mp_limb_t a, b, m, cy, maska, maskb;
int cnt, tcnt, ucnt, vcnt;
CMP_SWAP (up, vp, n);
a = up[0];
maska = -((a >> 4) & 2);
b = vp[0];
maskb = -((b >> 4) & 2);
m = GMP_NUMB_MAX ^ ((tabp[(((a^maska) & mask) << (4 - 1)) + (((b^maskb) & mask) >> 1)]^maska^maskb) & mask);
count_leading_zeros (ucnt, up[n - 1]);
count_leading_zeros (tcnt, vp[n - 2] | up[n - 1]);
count_leading_zeros (vcnt, vp[n - 1] | 1);
cnt = (vp[n - 1] == 0 ? GMP_NUMB_BITS + tcnt : vcnt) - ucnt;
ASSERT (0 <= cnt && cnt <= GMP_NUMB_BITS);
m &= GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1));
cy = mpn_submul_1 (up, vp, n, m);
if (UNLIKELY (cy))
mpn_neg (up, up, n);
a = up[0];
ASSERT_ALWAYS ((a & 0x3f & (GMP_NUMB_MASK >> (GMP_NUMB_BITS - MAX (cnt , 1)))) == 0);
if (UNLIKELY (a == 0))
{
mp_size_t i;
for (i = 1; up[i] == 0;)
if (++i == n)
{
mpn_copyi (rp, vp, n);
return;
}
if ((up[i] & 1) == 0)
{
int cnt;
count_trailing_zeros (cnt, up[i]);
mpn_rshift (up, up + i, n - i, cnt);
}
else
{
mpn_copyi (up, up + i, n - i);
}
MPN_FILL (up + n - i, i, 0);
}
else
{
count_trailing_zeros (cnt, a);
mpn_rshift (up, up, n, cnt);
}
n -= (up[n - 1] | vp[n - 1]) == 0;
}
mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
rp[0] = g.d0;
rp[1] = g.d1;
}
void
mpn_gcd_33 (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
mpn_gcd_n (rp, up, vp, 3);
}
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel