This is a continuation of the mpz_prevprime thread but with a better name. Marco was interested in what using dynamically allocating primes would look like. I've attached a couple of screenshots.
"static_benefit" measures the maximum speedup from having a larger prime table. Just above the current threshold (310,000 = 18 bits) the large table gives us a *3x speedup*, and it is slightly faster up to ~30 million (the speedup is less and less and requires more and more entries be added to primegap_small). I implemented a dynamic version (see patch) which gets about half of the gain. With the dynamic version we could improve THRESHOLD from 310,000 to X,000,000, IMHO if we want that 2-3x speedup it's less code (and maybe less final size) to just include 70-200 extra primes in primegap_small which would be faster and require less special code. All of this got me thinking about Marco's speedup version which reused the sieve, I was hoping this would be faster on medium / large ranges, so I tried that out with a larger static table and using their ulong patch (which was a nice insight) but sadly this is also slower (marco.png) I think a good balance would be to add 70 primes and increase SMALL_LIMIT to 1,000,000 This allows us to capture ~1/3 of the potential improvement with relatively minor change. We would need a new constant to differentiate if (nbits / 2 < NUMBER_OF_PRIMES) and ASSERT (i < NUMBER_OF_PRIMES); At the same time this is a relatively minor improvement and I think we're good doing nothing (especially if Marco is getting tired of reviewing, improving, and committing my code) Much thanks to Marco for their quick reviews and insight throughout.
diff -r 805304ca965a mpz/nextprime.c --- a/mpz/nextprime.c Tue Mar 24 23:13:28 2020 +0100 +++ b/mpz/nextprime.c Wed Mar 25 16:17:45 2020 -0700 @@ -78,16 +78,109 @@ static const unsigned char primegap_small[] = { - 2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6, - 2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2, - 4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6, - 12,2,18,6,10 +2, 2, 4, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2, +4, 14, 4, 6, 2, 10, 2, 6, 6, 4, 6, 6, 2, 10, 2, 4, 2, 12, 12, 4, 2, 4, 6, 2, 10, +6, 6, 6, 2, 6, 4, 2, 10, 14, 4, 2, 4, 14, 6, 10, 2, 4, 6, 8, 6, 6, 4, 6, 8, 4, +8, 10, 2, 10, 2, 6, 4, 6, 8, 4, 2, 4, 12, 8, 4, 8, 4, 6, 12, 2, 18, 6, 10, 6, 6, +2, 6, 10, 6, 6, 2, 6, 6, 4, 2, 12, 10, 2, 4, 6, 6, 2, 12, 4, 6, 8, 10, 8, 10, 8, +6, 6, 4, 8, 6, 4, 8, 4, 14, 10, 12, 2, 10, 2, 4, 2, 10, 14, 4, 2, 4, 14, 4, 2, +4, 20, 4, 8, 10, 8, 4, 6, 6, 14, 4, 6, 6, 8, 6, 12, 4, 6, 2, 10, 2, 6, 10, 2, +10, 2, 6, 18, 4, 2, 4, 6, 6, 8, 6, 6, 22, 2, 10, 8, 10, 6, 6, 8, 12, 4, 6, 6, 2, +6, 12, 10, 18, 2, 4, 6, 2, 6, 4, 2, 4, 12, 2, 6, 34, 6, 6, 8, 18, 10, 14, 4, 2, +4, 6, 8, 4, 2, 6, 12, 10, 2, 4, 2, 4, 6, 12, 12, 8, 12, 6, 4, 6, 8, 4, 8, 4, 14, +4, 6, 2, 4, 6, 2, 6, 10, 20, 6, 4, 2, 24, 4, 2, 10, 12, 2, 10, 8, 6, 6, 6, 18, +6, 4, 2, 12, 10, 12, 8, 16, 14, 6, 4, 2, 4, 2, 10, 12, 6, 6, 18, 2, 16, 2, 22, +6, 8, 6, 4, 2, 4, 8, 6, 10, 2, 10, 14, 10, 6, 12, 2, 4, 2, 10, 12, 2, 16, 2, 6, +4, 2, 10, 8, 18, 24, 4, 6, 8, 16, 2, 4, 8, 16, 2, 4, 8, 6, 6, 4, 12, 2, 22, 6, +2, 6, 4, 6, 14, 6, 4, 2, 6, 4, 6, 12, 6, 6, 14, 4, 6, 12, 8, 6, 4, 26, 18, 10, +8, 4, 6, 2, 6, 22, 12, 2, 16, 8, 4, 12, 14, 10, 2, 4, 8, 6, 6, 4, 2, 4, 6, 8, 4, +2, 6, 10, 2, 10, 8, 4, 14, 10, 12, 2, 6, 4, 2, 16, 14, 4, 6, 8, 6, 4, 18, 8, 10, +6, 6, 8, 10, 12, 14, 4, 6, 6, 2, 28, 2, 10, 8, 4, 14, 4, 8, 12, 6, 12, 4, 6, 20, +10, 2, 16, 26, 4, 2, 12, 6, 4, 12, 6, 8, 4, 8, 22, 2, 4, 2, 12, 28, 2, 6, 6, 6, +4, 6, 2, 12, 4, 12, 2, 10, 2, 16, 2, 16, 6, 20, 16, 8, 4, 2, 4, 2, 22, 8, 12, 6, +10, 2, 4, 6, 2, 6, 10, 2, 12, 10, 2, 10, 14, 6, 4, 6, 8, 6, 6, 16, 12, 2, 4, 14, +6, 4, 8, 10, 8, 6, 6, 22, 6, 2, 10, 14, 4, 6, 18, 2, 10, 14, 4, 2, 10, 14, 4, 8, +18, 4, 6, 2, 4, 6, 2, 12, 4, 20, 22, 12, 2, 4, 6, 6, 2, 6, 22, 2, 6, 16, 6, 12, +2, 6, 12, 16, 2, 4, 6, 14, 4, 2, 18, 24, 10, 6, 2, 10, 2, 10, 2, 10, 6, 2, 10, +2, 10, 6, 8, 30, 10, 2, 10, 8, 6, 10, 18, 6, 12, 12, 2, 18, 6, 4, 6, 6, 18, 2, +10, 14, 6, 4, 2, 4, 24, 2, 12, 6, 16, 8, 6, 6, 18, 16, 2, 4, 6, 2, 6, 6, 10, 6, +12, 12, 18, 2, 6, 4, 18, 8, 24, 4, 2, 4, 6, 2, 12, 4, 14, 30, 10, 6, 12, 14, 6, +10, 12, 2, 4, 6, 8, 6, 10, 2, 4, 14, 6, 6, 4, 6, 2, 10, 2, 16, 12, 8, 18, 4, 6, +12, 2, 6, 6, 6, 28, 6, 14, 4, 8, 10, 8, 12, 18, 4, 2, 4, 24, 12, 6, 2, 16, 6, 6, +14, 10, 14, 4, 30, 6, 6, 6, 8, 6, 4, 2, 12, 6, 4, 2, 6, 22, 6, 2, 4, 18, 2, 4, +12, 2, 6, 4, 26, 6, 6, 4, 8, 10, 32, 16, 2, 6, 4, 2, 4, 2, 10, 14, 6, 4, 8, 10, +6, 20, 4, 2, 6, 30, 4, 8, 10, 6, 6, 8, 6, 12, 4, 6, 2, 6, 4, 6, 2, 10, 2, 16, 6, +20, 4, 12, 14, 28, 6, 20, 4, 18, 8, 6, 4, 6, 14, 6, 6, 10, 2, 10, 12, 8, 10, 2, +10, 8, 12, 10, 24, 2, 4, 8, 6, 4, 8, 18, 10, 6, 6, 2, 6, 10, 12, 2, 10, 6, 6, 6, +8, 6, 10, 6, 2, 6, 6, 6, 10, 8, 24, 6, 22, 2, 18, 4, 8, 10, 30, 8, 18, 4, 2, 10, +6, 2, 6, 4, 18, 8, 12, 18, 16, 6, 2, 12, 6, 10, 2, 10, 2, 6, 10, 14, 4, 24, 2, +16, 2, 10, 2, 10, 20, 4, 2, 4, 8, 16, 6, 6, 2, 12, 16, 8, 4, 6, 30, 2, 10, 2, 6, +4, 6, 6, 8, 6, 4, 12, 6, 8, 12, 4, 14, 12, 10, 24, 6, 12, 6, 2, 22, 8, 18, 10, +6, 14, 4, 2, 6, 10, 8, 6, 4, 6, 30, 14, 10, 2, 12, 10, 2, 16, 2, 18, 24, 18, 6, +16, 18, 6, 2, 18, 4, 6, 2, 10, 8, 10, 6, 6, 8, 4, 6, 2, 10, 2, 12, 4, 6, 6, 2, +12, 4, 14, 18, 4, 6, 20, 4, 8, 6, 4, 8, 4, 14, 6, 4, 14, 12, 4, 2, 30, 4, 24, 6, +6, 12, 12, 14, 6, 4, 2, 4, 18, 6, 12, 8, 6, 4, 12, 2, 12, 30, 16, 2, 6, 22, 14, +6, 10, 12, 6, 2, 4, 8, 10, 6, 6, 24, 14, 6, 4, 8, 12, 18, 10, 2, 10, 2, 4, 6, +20, 6, 4, 14, 4, 2, 4, 14, 6, 12, 24, 10, 6, 8, 10, 2, 30, 4, 6, 2, 12, 4, 14, +6, 34, 12, 8, 6, 10, 2, 4, 20, 10, 8, 16, 2, 10, 14, 4, 2, 12, 6, 16, 6, 8, 4, +8, 4, 6, 8, 6, 6, 12, 6, 4, 6, 6, 8, 18, 4, 20, 4, 12, 2, 10, 6, 2, 10, 12, 2, +4, 20, 6, 30, 6, 4, 8, 10, 12, 6, 2, 28, 2, 6, 4, 2, 16, 12, 2, 6, 10, 8, 24, +12, 6, 18, 6, 4, 14, 6, 4, 12, 8, 6, 12, 4, 6, 12, 6, 12, 2, 16, 20, 4, 2, 10, +18, 8, 4, 14, 4, 2, 6, 22, 6, 14, 6, 6, 10, 6, 2, 10, 2, 4, 2, 22, 2, 4, 6, 6, +12, 6, 14, 10, 12, 6, 8, 4, 36, 14, 12, 6, 4, 6, 2, 12, 6, 12, 16, 2, 10, 8, 22, +2, 12, 6, 4, 6, 18, 2, 12, 6, 4, 12, 8, 6, 12, 4, 6, 12, 6, 2, 12, 12, 4, 14, 6, +16, 6, 2, 10, 8, 18, 6, 34, 2, 28, 2, 22, 6, 2, 10, 12, 2, 6, 4, 8, 22, 6, 2, +10, 8, 4, 6, 8, 4, 12, 18, 12, 20, 4, 6, 6, 8, 4, 2, 16, 12, 2, 10, 8, 10, 2, 4, +6, 14, 12, 22, 8, 28, 2, 4, 20, 4, 2, 4, 14, 10, 12, 2, 12, 16, 2, 28, 8, 22, 8, +4, 6, 6, 14, 4, 8, 12, 6, 6, 4, 20, 4, 18, 2, 12, 6, 4, 6, 14, 18, 10, 8, 10, +32, 6, 10, 6, 6, 2, 6, 16, 6, 2, 12, 6, 28, 2, 10, 8, 16, 6, 8, 6, 10, 24, 20, +10, 2, 10, 2, 12, 4, 6, 20, 4, 2, 12, 18, 10, 2, 10, 2, 4, 20, 16, 26, 4, 8, 6, +4, 12, 6, 8, 12, 12, 6, 4, 8, 22, 2, 16, 14, 10, 6, 12, 12, 14, 6, 4, 20, 4, 12, +6, 2, 6, 6, 16, 8, 22, 2, 28, 8, 6, 4, 20, 4, 12, 24, 20, 4, 8, 10, 2, 16, 2, +12, 12, 34, 2, 4, 6, 12, 6, 6, 8, 6, 4, 2, 6, 24, 4, 20, 10, 6, 6, 14, 4, 6, 6, +2, 12, 6, 10, 2, 10, 6, 20, 4, 26, 4, 2, 6, 22, 2, 24, 4, 6, 2, 4, 6, 24, 6, 8, +4, 2, 34, 6, 8, 16, 12, 2, 10, 2, 10, 6, 8, 4, 8, 12, 22, 6, 14, 4, 26, 4, 2, +12, 10, 8, 4, 8, 12, 4, 14, 6, 16, 6, 8, 4, 6, 6, 8, 6, 10, 12, 2, 6, 6, 16, 8, +6, 6, 12, 10, 2, 6, 18, 4, 6, 6, 6, 12, 18, 8, 6, 10, 8, 18, 4, 14, 6, 18, 10, +8, 10, 12, 2, 6, 12, 12, 36, 4, 6, 8, 4, 6, 2, 4, 18, 12, 6, 8, 6, 6, 4, 18, 2, +4, 2, 24, 4, 6, 6, 14, 30, 6, 4, 6, 12, 6, 20, 4, 8, 4, 8, 6, 6, 4, 30, 2, 10, +12, 8, 10, 8, 24, 6, 12, 4, 14, 4, 6, 2, 28, 14, 16, 2, 12, 6, 4, 20, 10, 6, 6, +6, 8, 10, 12, 14, 10, 14, 16, 14, 10, 14, 6, 16, 6, 8, 6, 16, 20, 10, 2, 6, 4, +2, 4, 12, 2, 10, 2, 6, 22, 6, 2, 4, 18, 8, 10, 8, 22, 2, 10, 18, 14, 4, 2, 4, +18, 2, 4, 6, 8, 10, 2, 30, 4, 30, 2, 10, 2, 18, 4, 18, 6, 14, 10, 2, 4, 20, 36, +6, 4, 6, 14, 4, 20, 10, 14, 22, 6, 2, 30, 12, 10, 18, 2, 4, 14, 6, 22, 18, 2, +12, 6, 4, 8, 4, 8, 6, 10, 2, 12, 18, 10, 14, 16, 14, 4, 6, 6, 2, 6, 4, 2, 28, 2, +28, 6, 2, 4, 6, 14, 4, 12, 14, 16, 14, 4, 6, 8, 6, 4, 6, 6, 6, 8, 4, 8, 4, 14, +16, 8, 6, 4, 12, 8, 16, 2, 10, 8, 4, 6, 26, 6, 10, 8, 4, 6, 12, 14, 30, 4, 14, +22, 8, 12, 4, 6, 8, 10, 6, 14, 10, 6, 2, 10, 12, 12, 14, 6, 6, 18, 10, 6, 8, 18, +4, 6, 2, 6, 10, 2, 10, 8, 6, 6, 10, 2, 18, 10, 2, 12, 4, 6, 8, 10, 12, 14, 12, +4, 8, 10, 6, 6, 20, 4, 14, 16, 14, 10, 8, 10, 12, 2, 18, 6, 12, 10, 12, 2, 4, 2, +12, 6, 4, 8, 4, 44, 4, 2, 4, 2, 10, 12, 6, 6, 14, 4, 6, 6, 6, 8, 6, 36, 18, 4, +6, 2, 12, 6, 6, 6, 4, 14, 22, 12, 2, 18, 10, 6, 26, 24, 4, 2, 4, 2, 4, 14, 4, 6, +6, 8, 16, 12, 2, 42, 4, 2, 4, 24, 6, 6, 2, 18, 4, 14, 6, 28, 18, 14, 6, 10, 12, +2, 6, 12, 30, 6, 4, 6, 6, 14, 4, 2, 24, 4, 6, 6, 26, 10, 18, 6, 8, 6, 6, 30, 4, +12, 12, 2, 16, 2, 6, 4, 12, 18, 2, 6, 4, 26, 12, 6, 12, 4, 24, 24, 12, 6, 2, 12, +28, 8, 4, 6, 12, 2, 18, 6, 4, 6, 6, 20, 16, 2, 6, 6, 18, 10, 6, 2, 4, 8, 6, 6, +24, 16, 6, 8, 10, 6, 14, 22, 8, 16, 6, 2, 12, 4, 2, 22, 8, 18, 34, 2, 6, 18, 4, +6, 6, 8, 10, 8, 18, 6, 4, 2, 4, 8, 16, 2, 12, 12, 6, 18, 4, 6, 6, 6, 2, 6, 12, +10, 20, 12, 18, 4, 6, 2, 16, 2, 10, 14, 4, 30, 2, 10, 12, 2, 24, 6, 16, 8, 10, +2, 12, 22, 6, 2, 16, 20, 10, 2, 12, 12, 18, 10, 12, 6, 2, 10, 2, 6, 10, 18, 2, +12, 6, 4, 6, 2, 24, 28, 2, 4, 2, 10, 2, 16, 12, 8, 22, 2, 6, 4, 2, 10, 6, 20, +12, 10, 8, 12, 6, 6, 6, 4, 18, 2, 4, 12, 18, 2, 12, 6, 4, 2, 16, 12, 12, 14, 4, +8, 18, 4, 12, 14, 6, 6, 4, 8, 6, 4, 20, 12, 10, 14, 4, 2, 16, 2, 12, 30, 4, 6, +24, 20, 24, 10, 8, 12, 10, 12, 6, 12, 12, 6, 8, 16, 14, 6, 4, 6, 36, 20, 10, 30, +12, 2, 4, 2, 28, 12, 14, 6, 22, 8, 4, 18, 6, 14, 18, 4, 6, 2, 6, 34, 18, 2, 16, +6, 18, 2, 24, 4, 2, 6, 12, 6, 12, 10, 8, 6, 16, 12, 8, 10, 14, 40, 6, 2, 6, 4, +12, 14, 4, 2, 4, 2, 4, 8, 6, 10, 6, 6, 2, 6, 6, 6, 12, 6, 24, 10, 2, 10, 6, 12, +6, 6, 14, 6, 6, 52, 20, 6, 10, 2, 10, 8, 10, 12, 12, 2, 6, 4, 14, 16, 8, 12, 6, +22, 2, 10, 8, 6, 22, 2, 22, 6, 8, 10, 12, 12, 2, 10, 6, 12, 2, 4 }; #define NUMBER_OF_PRIMES 100 #define LAST_PRIME 557 /* NP_SMALL_LIMIT = prevprime (LAST_PRIME ^ 2) */ -#define NP_SMALL_LIMIT 310243 +//#define NP_SMALL_LIMIT 310243 +#define NP_SMALL_LIMIT 64000000 static unsigned long calculate_sievelimit(mp_bitcnt_t nbits) { @@ -129,12 +222,50 @@ ASSERT (t > 0); /* Expect t=1 if the operand was smaller.*/ ASSERT (t < NP_SMALL_LIMIT); + /* Find all primes from end of gap to sqrt(t) */ + const unsigned char *primegap = primegap_small; + unsigned char *primegap_tmp; + + TMP_DECL; + TMP_MARK; + + if (t > 310243) + { + // Dynamically size extra primes + + // sqrt of t + 100 + unsigned long sieve_limit; + { + mp_limb_t res; + const mp_limb_t src = t; + mpn_sqrtrem(&res, NULL, &src, 1); + sieve_limit = (unsigned long) res + 100; + } + + mp_limb_t *sieve = TMP_SALLOC_LIMBS (primesieve_size (sieve_limit)); + int prime_limit = gmp_primesieve (sieve, sieve_limit); + + /* Needed to avoid assignment of read-only location */ + primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char); + unsigned long last_prime; + + int i = 0; + last_prime = 3; + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), sieve); + primegap_tmp[i++] = prime - last_prime; + last_prime = prime; + LOOP_ON_SIEVE_END; + + /* Both primesieve and prime_limit ignore the first two primes. */ + ASSERT(i == prime_limit); + } + /* Start from next candidate (2 or odd) */ t = (t + 1) | (t > 1); for (; ; t += 2) { unsigned prime = 3; - for (int i = 0; ; prime += primegap_small[i++]) + for (int i = 0; ; prime += primegap[i++]) { unsigned q, r; q = t / prime; @@ -143,7 +274,7 @@ return t; if (r == 0) break; - ASSERT (i < NUMBER_OF_PRIMES); +// ASSERT (i < NUMBER_OF_PRIMES); } } }
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel