Seth Troisi <brain...@gmail.com> writes: > This was discussed previously in this thread > https://gmplib.org/list-archives/gmp-devel/2019-September/005465.html > TL;DR >95% of time is spent in miller rabin.
I tried out using the same dtab table used in trialdiv.c. On my laptop, it gives a speedup of about 25% for larger sizes. Not sure how to tune for small sizes, but clearly the old code clamping the size of the prime table based on the bit size is better than doing nothing. The computation of all the moduli could be speed up by using ptab, and by using precomputed reciprocals. Not clear to me how much the speed of that part matters. I haven't run any profiling. Before: $ ./tune/speed -s 100-300 -f 1.1 mpz_nextprime overhead 0.000000005 secs, precision 10000 units of 8.34e-10 secs, CPU freq 1198.47 MHz mpz_nextprime 100 0.000134064 110 0.000145867 121 0.000170687 133 0.000301883 146 0.000341107 160 0.000403326 176 0.000451402 193 0.000745043 212 0.000790877 233 0.000896869 256 0.001118979 281 0.001641613 500 0.008096105 1000 0.091219932 After: $ ./tune/speed -s 100-300 -f 1.1 mpz_nextprime overhead 0.000000005 secs, precision 10000 units of 8.14e-10 secs, CPU freq 1228.24 MHz mpz_nextprime 100 0.000212717 110 0.000223992 121 0.000241062 133 0.000355139 146 0.000388422 160 0.000435633 176 0.000477857 193 0.000716578 212 0.000757639 233 0.000847678 256 0.001018164 281 0.001435439 500 0.006692777 1000 0.072001893 Regards, /Niels
/* mpz_nextprime(p,t) - compute the next prime > t and store that in p. Copyright 1999-2001, 2008, 2009, 2012 Free Software Foundation, Inc. Contributed to the GNU project by Niels Möller and Torbjorn Granlund. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of either: * the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. or * the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. or both in parallel, as here. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with the GNU MP Library. If not, see https://www.gnu.org/licenses/. */ #include "gmp-impl.h" #include "longlong.h" #if 1 struct gmp_primes_dtab { mp_limb_t p; mp_limb_t binv; mp_limb_t lim; }; static const struct gmp_primes_dtab dtab[] = { #define WANT_dtab #define P(p,inv,lim) {p, inv,lim} #include "trialdivtab.h" #undef WANT_dtab #undef P }; #define DTAB_SIZE (sizeof (dtab) / sizeof (dtab[0])) void mpz_nextprime (mpz_ptr p, mpz_srcptr n) { unsigned short *moduli; unsigned long difference; int i; unsigned incr; TMP_SDECL; /* First handle tiny numbers */ if (mpz_cmp_ui (n, 2) < 0) { mpz_set_ui (p, 2); return; } mpz_add_ui (p, n, 1); mpz_setbit (p, 0); if (mpz_cmp_ui (p, 7) <= 0) return; if (SIZ(p) == 1) { mp_limb_t pl = PTR(p)[0]; if (pl < SMALLEST_OMITTED_PRIME) { /* Simple linear search */ for (i = 0; i < DTAB_SIZE; i++) if (dtab[i].p >= pl) { PTR(p)[0] = dtab[i].p; return; } PTR(p)[0] = SMALLEST_OMITTED_PRIME; return; } /* FIXME: Could check if pl < SMALLEST_OMITTED_PRIME^2, or generally do single-limb sieving. */ } TMP_SMARK; /* Compute residues modulo small odd primes */ moduli = TMP_SALLOC_TYPE (DTAB_SIZE, unsigned short); for (;;) { /* FIXME: Compute lazily? */ for (i = 0; i < DTAB_SIZE; i++) { /* FIXME: Could speedup using ptab*/ moduli[i] = mpz_tdiv_ui (p, dtab[i].p); } #define INCR_LIMIT 0x10000 /* deep science */ for (difference = incr = 0; incr < INCR_LIMIT; difference += 2) { /* First check divisibility based on prime list */ for (i = 0; i < DTAB_SIZE; i++) { if ((moduli[i] + incr) * dtab[i].binv <= dtab[i].lim) goto next; } mpz_add_ui (p, p, difference); difference = 0; /* Miller-Rabin test */ if (mpz_millerrabin (p, 25)) goto done; next:; incr += 2; } mpz_add_ui (p, p, difference); difference = 0; } done: TMP_SFREE; } #else static const unsigned char primegap[] = { 2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6, 2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2, 4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6, 12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8, 6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6, 6,14,4,6,6,8,6,12 }; #define NUMBER_OF_PRIMES 167 void mpz_nextprime (mpz_ptr p, mpz_srcptr n) { unsigned short *moduli; unsigned long difference; int i; unsigned prime_limit; unsigned long prime; mp_size_t pn; mp_bitcnt_t nbits; unsigned incr; TMP_SDECL; /* First handle tiny numbers */ if (mpz_cmp_ui (n, 2) < 0) { mpz_set_ui (p, 2); return; } mpz_add_ui (p, n, 1); mpz_setbit (p, 0); if (mpz_cmp_ui (p, 7) <= 0) return; pn = SIZ(p); MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1); if (nbits / 2 >= NUMBER_OF_PRIMES) prime_limit = NUMBER_OF_PRIMES - 1; else prime_limit = nbits / 2; TMP_SMARK; /* Compute residues modulo small odd primes */ moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short); for (;;) { /* FIXME: Compute lazily? */ prime = 3; for (i = 0; i < prime_limit; i++) { moduli[i] = mpz_tdiv_ui (p, prime); prime += primegap[i]; } #define INCR_LIMIT 0x10000 /* deep science */ for (difference = incr = 0; incr < INCR_LIMIT; difference += 2) { /* First check residues */ prime = 3; for (i = 0; i < prime_limit; i++) { unsigned r; /* FIXME: Reduce moduli + incr and store back, to allow for division-free reductions. Alternatively, table primes[]'s inverses (mod 2^16). */ r = (moduli[i] + incr) % prime; prime += primegap[i]; if (r == 0) goto next; } mpz_add_ui (p, p, difference); difference = 0; /* Miller-Rabin test */ if (mpz_millerrabin (p, 25)) goto done; next:; incr += 2; } mpz_add_ui (p, p, difference); difference = 0; } done: TMP_SFREE; } #endif
-- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance.
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel