I've now tried the u + v variant for gcd_22. I've strived to minimize the number of carry propagations, and I see no obvious microptimizations. And it's slower, time increases from 7.8 cycles per input bit to 9.8, when I measure;
Unclear if it's because the critical path gets longer, or if it's just too many instructions. We get a sequence of depending instructions xor, and, neg, xor, before we get to the sub_ddmmss that cancels some low bits for us. Regards, /Niels mp_double_limb_t mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0) { mp_double_limb_t g; ASSERT (u0 & v0 & 1); /* Implicit least significant bit */ u0 = (u0 >> 1) | (u1 << (GMP_LIMB_BITS - 1)); u1 >>= 1; v0 = (v0 >> 1) | (v1 << (GMP_LIMB_BITS - 1)); v1 >>= 1; while (u1 || v1) /* u1 == 0 can happen at most twice per call */ { mp_limb_t x1, x0, vgtu; mp_limb_t t1, t0, use_add; int c; x1 = u1 ^ v1; x0 = u0 ^ v0; /* Incorrectly false if u1 == v1 and v1 > u1, but that's good enough to select the "smallest" of u, v.*/ vgtu = LIMB_HIGHBIT_TO_MASK (u1 - v1); use_add = - (x0 & 1); sub_ddmmss (t1, t0, u1, u0, use_add ^ v1, use_add ^ v0); if (UNLIKELY (t0 == 0)) { if (t1 == 0) { g.d1 = (u1 << 1) | (u0 >> (GMP_LIMB_BITS - 1)); g.d0 = (u0 << 1) | 1; return g; } int c; count_trailing_zeros (c, t1); /* v1 = min (u1, v1) */ v1 ^= (vgtu & x1); /* u0 = |u1 - v1| */ vgtu = ~use_add & LIMB_HIGHBIT_TO_MASK(t1);; u0 = (t1 ^ vgtu) - vgtu; ASSERT (c < GMP_LIMB_BITS - 1); u0 >>= c + 1; u1 = 0; } else { count_trailing_zeros (c, t0); c++; /* V <-- min (U, V). */ v1 ^= vgtu & x1; v0 ^= vgtu & x0; vgtu = ~use_add & LIMB_HIGHBIT_TO_MASK (t1); /* U <-- |U - V| No carry handling needed in this conditional negation, since t0 != 0. */ u0 = (t0 ^ vgtu) - vgtu; u1 = t1 ^ vgtu; if (UNLIKELY (c == GMP_LIMB_BITS)) { u0 = u1; u1 = 0; } else { u0 = (u0 >> c) | (u1 << (GMP_LIMB_BITS - c)); u1 >>= c; } } } while ((v0 | u0) & GMP_LIMB_HIGHBIT) { /* At most two iterations */ mp_limb_t vgtu, d0; int c; sub_ddmmss (vgtu, d0, 0, u0, 0, v0); if (UNLIKELY (d0 == 0)) { g.d1 = u0 >> (GMP_LIMB_BITS - 1); g.d0 = (u0 << 1) | 1; return g; } /* v <-- min (u, v) */ v0 += (vgtu & d0); /* u <-- |u - v| */ u0 = (d0 ^ vgtu) - vgtu; count_trailing_zeros (c, d0); u0 = (u0 >> 1) >> c; } g.d0 = mpn_gcd_11 ((u0 << 1) + 1, (v0 << 1) + 1); g.d1 = 0; return g; } -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel