This is a continuation of the mpz_prevprime thread but with a better name.

Marco was interested in what using dynamically allocating primes would look
like.
I've attached a couple of screenshots.

"static_benefit" measures the maximum speedup from having a larger prime
table.
Just above the current threshold (310,000 = 18 bits) the large table gives
us a *3x speedup*, and it is slightly faster up to ~30 million (the speedup
is less and less and requires more and more entries be added to
primegap_small).

I implemented a dynamic version (see patch) which gets about half of the
gain.

With the dynamic version we could improve THRESHOLD from 310,000 to
X,000,000, IMHO if we want that 2-3x speedup it's less code (and maybe less
final size) to just include 70-200 extra primes in primegap_small which
would be faster and require less special code.

All of this got me thinking about Marco's speedup version which reused the
sieve, I was hoping this would be faster on medium / large ranges, so I
tried that out with a larger static table and using their ulong patch
(which was a nice insight) but sadly this is also slower (marco.png)

I think a good balance would be to add 70 primes and increase SMALL_LIMIT
to 1,000,000
This allows us to capture ~1/3 of the potential improvement with relatively
minor change.

We would need a new constant to differentiate
if (nbits / 2 < NUMBER_OF_PRIMES)
  and
ASSERT (i < NUMBER_OF_PRIMES);

At the same time this is a relatively minor improvement and I think we're
good doing nothing (especially if Marco is getting tired of reviewing,
improving, and committing my code)

Much thanks to Marco for their quick reviews and insight throughout.
diff -r 805304ca965a mpz/nextprime.c
--- a/mpz/nextprime.c	Tue Mar 24 23:13:28 2020 +0100
+++ b/mpz/nextprime.c	Wed Mar 25 16:17:45 2020 -0700
@@ -78,16 +78,109 @@
 
 static const unsigned char primegap_small[] =
 {
-  2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
-  2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
-  4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
-  12,2,18,6,10
+2, 2, 4, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,
+4, 14, 4, 6, 2, 10, 2, 6, 6, 4, 6, 6, 2, 10, 2, 4, 2, 12, 12, 4, 2, 4, 6, 2, 10,
+6, 6, 6, 2, 6, 4, 2, 10, 14, 4, 2, 4, 14, 6, 10, 2, 4, 6, 8, 6, 6, 4, 6, 8, 4,
+8, 10, 2, 10, 2, 6, 4, 6, 8, 4, 2, 4, 12, 8, 4, 8, 4, 6, 12, 2, 18, 6, 10, 6, 6,
+2, 6, 10, 6, 6, 2, 6, 6, 4, 2, 12, 10, 2, 4, 6, 6, 2, 12, 4, 6, 8, 10, 8, 10, 8,
+6, 6, 4, 8, 6, 4, 8, 4, 14, 10, 12, 2, 10, 2, 4, 2, 10, 14, 4, 2, 4, 14, 4, 2,
+4, 20, 4, 8, 10, 8, 4, 6, 6, 14, 4, 6, 6, 8, 6, 12, 4, 6, 2, 10, 2, 6, 10, 2,
+10, 2, 6, 18, 4, 2, 4, 6, 6, 8, 6, 6, 22, 2, 10, 8, 10, 6, 6, 8, 12, 4, 6, 6, 2,
+6, 12, 10, 18, 2, 4, 6, 2, 6, 4, 2, 4, 12, 2, 6, 34, 6, 6, 8, 18, 10, 14, 4, 2,
+4, 6, 8, 4, 2, 6, 12, 10, 2, 4, 2, 4, 6, 12, 12, 8, 12, 6, 4, 6, 8, 4, 8, 4, 14,
+4, 6, 2, 4, 6, 2, 6, 10, 20, 6, 4, 2, 24, 4, 2, 10, 12, 2, 10, 8, 6, 6, 6, 18,
+6, 4, 2, 12, 10, 12, 8, 16, 14, 6, 4, 2, 4, 2, 10, 12, 6, 6, 18, 2, 16, 2, 22,
+6, 8, 6, 4, 2, 4, 8, 6, 10, 2, 10, 14, 10, 6, 12, 2, 4, 2, 10, 12, 2, 16, 2, 6,
+4, 2, 10, 8, 18, 24, 4, 6, 8, 16, 2, 4, 8, 16, 2, 4, 8, 6, 6, 4, 12, 2, 22, 6,
+2, 6, 4, 6, 14, 6, 4, 2, 6, 4, 6, 12, 6, 6, 14, 4, 6, 12, 8, 6, 4, 26, 18, 10,
+8, 4, 6, 2, 6, 22, 12, 2, 16, 8, 4, 12, 14, 10, 2, 4, 8, 6, 6, 4, 2, 4, 6, 8, 4,
+2, 6, 10, 2, 10, 8, 4, 14, 10, 12, 2, 6, 4, 2, 16, 14, 4, 6, 8, 6, 4, 18, 8, 10,
+6, 6, 8, 10, 12, 14, 4, 6, 6, 2, 28, 2, 10, 8, 4, 14, 4, 8, 12, 6, 12, 4, 6, 20,
+10, 2, 16, 26, 4, 2, 12, 6, 4, 12, 6, 8, 4, 8, 22, 2, 4, 2, 12, 28, 2, 6, 6, 6,
+4, 6, 2, 12, 4, 12, 2, 10, 2, 16, 2, 16, 6, 20, 16, 8, 4, 2, 4, 2, 22, 8, 12, 6,
+10, 2, 4, 6, 2, 6, 10, 2, 12, 10, 2, 10, 14, 6, 4, 6, 8, 6, 6, 16, 12, 2, 4, 14,
+6, 4, 8, 10, 8, 6, 6, 22, 6, 2, 10, 14, 4, 6, 18, 2, 10, 14, 4, 2, 10, 14, 4, 8,
+18, 4, 6, 2, 4, 6, 2, 12, 4, 20, 22, 12, 2, 4, 6, 6, 2, 6, 22, 2, 6, 16, 6, 12,
+2, 6, 12, 16, 2, 4, 6, 14, 4, 2, 18, 24, 10, 6, 2, 10, 2, 10, 2, 10, 6, 2, 10,
+2, 10, 6, 8, 30, 10, 2, 10, 8, 6, 10, 18, 6, 12, 12, 2, 18, 6, 4, 6, 6, 18, 2,
+10, 14, 6, 4, 2, 4, 24, 2, 12, 6, 16, 8, 6, 6, 18, 16, 2, 4, 6, 2, 6, 6, 10, 6,
+12, 12, 18, 2, 6, 4, 18, 8, 24, 4, 2, 4, 6, 2, 12, 4, 14, 30, 10, 6, 12, 14, 6,
+10, 12, 2, 4, 6, 8, 6, 10, 2, 4, 14, 6, 6, 4, 6, 2, 10, 2, 16, 12, 8, 18, 4, 6,
+12, 2, 6, 6, 6, 28, 6, 14, 4, 8, 10, 8, 12, 18, 4, 2, 4, 24, 12, 6, 2, 16, 6, 6,
+14, 10, 14, 4, 30, 6, 6, 6, 8, 6, 4, 2, 12, 6, 4, 2, 6, 22, 6, 2, 4, 18, 2, 4,
+12, 2, 6, 4, 26, 6, 6, 4, 8, 10, 32, 16, 2, 6, 4, 2, 4, 2, 10, 14, 6, 4, 8, 10,
+6, 20, 4, 2, 6, 30, 4, 8, 10, 6, 6, 8, 6, 12, 4, 6, 2, 6, 4, 6, 2, 10, 2, 16, 6,
+20, 4, 12, 14, 28, 6, 20, 4, 18, 8, 6, 4, 6, 14, 6, 6, 10, 2, 10, 12, 8, 10, 2,
+10, 8, 12, 10, 24, 2, 4, 8, 6, 4, 8, 18, 10, 6, 6, 2, 6, 10, 12, 2, 10, 6, 6, 6,
+8, 6, 10, 6, 2, 6, 6, 6, 10, 8, 24, 6, 22, 2, 18, 4, 8, 10, 30, 8, 18, 4, 2, 10,
+6, 2, 6, 4, 18, 8, 12, 18, 16, 6, 2, 12, 6, 10, 2, 10, 2, 6, 10, 14, 4, 24, 2,
+16, 2, 10, 2, 10, 20, 4, 2, 4, 8, 16, 6, 6, 2, 12, 16, 8, 4, 6, 30, 2, 10, 2, 6,
+4, 6, 6, 8, 6, 4, 12, 6, 8, 12, 4, 14, 12, 10, 24, 6, 12, 6, 2, 22, 8, 18, 10,
+6, 14, 4, 2, 6, 10, 8, 6, 4, 6, 30, 14, 10, 2, 12, 10, 2, 16, 2, 18, 24, 18, 6,
+16, 18, 6, 2, 18, 4, 6, 2, 10, 8, 10, 6, 6, 8, 4, 6, 2, 10, 2, 12, 4, 6, 6, 2,
+12, 4, 14, 18, 4, 6, 20, 4, 8, 6, 4, 8, 4, 14, 6, 4, 14, 12, 4, 2, 30, 4, 24, 6,
+6, 12, 12, 14, 6, 4, 2, 4, 18, 6, 12, 8, 6, 4, 12, 2, 12, 30, 16, 2, 6, 22, 14,
+6, 10, 12, 6, 2, 4, 8, 10, 6, 6, 24, 14, 6, 4, 8, 12, 18, 10, 2, 10, 2, 4, 6,
+20, 6, 4, 14, 4, 2, 4, 14, 6, 12, 24, 10, 6, 8, 10, 2, 30, 4, 6, 2, 12, 4, 14,
+6, 34, 12, 8, 6, 10, 2, 4, 20, 10, 8, 16, 2, 10, 14, 4, 2, 12, 6, 16, 6, 8, 4,
+8, 4, 6, 8, 6, 6, 12, 6, 4, 6, 6, 8, 18, 4, 20, 4, 12, 2, 10, 6, 2, 10, 12, 2,
+4, 20, 6, 30, 6, 4, 8, 10, 12, 6, 2, 28, 2, 6, 4, 2, 16, 12, 2, 6, 10, 8, 24,
+12, 6, 18, 6, 4, 14, 6, 4, 12, 8, 6, 12, 4, 6, 12, 6, 12, 2, 16, 20, 4, 2, 10,
+18, 8, 4, 14, 4, 2, 6, 22, 6, 14, 6, 6, 10, 6, 2, 10, 2, 4, 2, 22, 2, 4, 6, 6,
+12, 6, 14, 10, 12, 6, 8, 4, 36, 14, 12, 6, 4, 6, 2, 12, 6, 12, 16, 2, 10, 8, 22,
+2, 12, 6, 4, 6, 18, 2, 12, 6, 4, 12, 8, 6, 12, 4, 6, 12, 6, 2, 12, 12, 4, 14, 6,
+16, 6, 2, 10, 8, 18, 6, 34, 2, 28, 2, 22, 6, 2, 10, 12, 2, 6, 4, 8, 22, 6, 2,
+10, 8, 4, 6, 8, 4, 12, 18, 12, 20, 4, 6, 6, 8, 4, 2, 16, 12, 2, 10, 8, 10, 2, 4,
+6, 14, 12, 22, 8, 28, 2, 4, 20, 4, 2, 4, 14, 10, 12, 2, 12, 16, 2, 28, 8, 22, 8,
+4, 6, 6, 14, 4, 8, 12, 6, 6, 4, 20, 4, 18, 2, 12, 6, 4, 6, 14, 18, 10, 8, 10,
+32, 6, 10, 6, 6, 2, 6, 16, 6, 2, 12, 6, 28, 2, 10, 8, 16, 6, 8, 6, 10, 24, 20,
+10, 2, 10, 2, 12, 4, 6, 20, 4, 2, 12, 18, 10, 2, 10, 2, 4, 20, 16, 26, 4, 8, 6,
+4, 12, 6, 8, 12, 12, 6, 4, 8, 22, 2, 16, 14, 10, 6, 12, 12, 14, 6, 4, 20, 4, 12,
+6, 2, 6, 6, 16, 8, 22, 2, 28, 8, 6, 4, 20, 4, 12, 24, 20, 4, 8, 10, 2, 16, 2,
+12, 12, 34, 2, 4, 6, 12, 6, 6, 8, 6, 4, 2, 6, 24, 4, 20, 10, 6, 6, 14, 4, 6, 6,
+2, 12, 6, 10, 2, 10, 6, 20, 4, 26, 4, 2, 6, 22, 2, 24, 4, 6, 2, 4, 6, 24, 6, 8,
+4, 2, 34, 6, 8, 16, 12, 2, 10, 2, 10, 6, 8, 4, 8, 12, 22, 6, 14, 4, 26, 4, 2,
+12, 10, 8, 4, 8, 12, 4, 14, 6, 16, 6, 8, 4, 6, 6, 8, 6, 10, 12, 2, 6, 6, 16, 8,
+6, 6, 12, 10, 2, 6, 18, 4, 6, 6, 6, 12, 18, 8, 6, 10, 8, 18, 4, 14, 6, 18, 10,
+8, 10, 12, 2, 6, 12, 12, 36, 4, 6, 8, 4, 6, 2, 4, 18, 12, 6, 8, 6, 6, 4, 18, 2,
+4, 2, 24, 4, 6, 6, 14, 30, 6, 4, 6, 12, 6, 20, 4, 8, 4, 8, 6, 6, 4, 30, 2, 10,
+12, 8, 10, 8, 24, 6, 12, 4, 14, 4, 6, 2, 28, 14, 16, 2, 12, 6, 4, 20, 10, 6, 6,
+6, 8, 10, 12, 14, 10, 14, 16, 14, 10, 14, 6, 16, 6, 8, 6, 16, 20, 10, 2, 6, 4,
+2, 4, 12, 2, 10, 2, 6, 22, 6, 2, 4, 18, 8, 10, 8, 22, 2, 10, 18, 14, 4, 2, 4,
+18, 2, 4, 6, 8, 10, 2, 30, 4, 30, 2, 10, 2, 18, 4, 18, 6, 14, 10, 2, 4, 20, 36,
+6, 4, 6, 14, 4, 20, 10, 14, 22, 6, 2, 30, 12, 10, 18, 2, 4, 14, 6, 22, 18, 2,
+12, 6, 4, 8, 4, 8, 6, 10, 2, 12, 18, 10, 14, 16, 14, 4, 6, 6, 2, 6, 4, 2, 28, 2,
+28, 6, 2, 4, 6, 14, 4, 12, 14, 16, 14, 4, 6, 8, 6, 4, 6, 6, 6, 8, 4, 8, 4, 14,
+16, 8, 6, 4, 12, 8, 16, 2, 10, 8, 4, 6, 26, 6, 10, 8, 4, 6, 12, 14, 30, 4, 14,
+22, 8, 12, 4, 6, 8, 10, 6, 14, 10, 6, 2, 10, 12, 12, 14, 6, 6, 18, 10, 6, 8, 18,
+4, 6, 2, 6, 10, 2, 10, 8, 6, 6, 10, 2, 18, 10, 2, 12, 4, 6, 8, 10, 12, 14, 12,
+4, 8, 10, 6, 6, 20, 4, 14, 16, 14, 10, 8, 10, 12, 2, 18, 6, 12, 10, 12, 2, 4, 2,
+12, 6, 4, 8, 4, 44, 4, 2, 4, 2, 10, 12, 6, 6, 14, 4, 6, 6, 6, 8, 6, 36, 18, 4,
+6, 2, 12, 6, 6, 6, 4, 14, 22, 12, 2, 18, 10, 6, 26, 24, 4, 2, 4, 2, 4, 14, 4, 6,
+6, 8, 16, 12, 2, 42, 4, 2, 4, 24, 6, 6, 2, 18, 4, 14, 6, 28, 18, 14, 6, 10, 12,
+2, 6, 12, 30, 6, 4, 6, 6, 14, 4, 2, 24, 4, 6, 6, 26, 10, 18, 6, 8, 6, 6, 30, 4,
+12, 12, 2, 16, 2, 6, 4, 12, 18, 2, 6, 4, 26, 12, 6, 12, 4, 24, 24, 12, 6, 2, 12,
+28, 8, 4, 6, 12, 2, 18, 6, 4, 6, 6, 20, 16, 2, 6, 6, 18, 10, 6, 2, 4, 8, 6, 6,
+24, 16, 6, 8, 10, 6, 14, 22, 8, 16, 6, 2, 12, 4, 2, 22, 8, 18, 34, 2, 6, 18, 4,
+6, 6, 8, 10, 8, 18, 6, 4, 2, 4, 8, 16, 2, 12, 12, 6, 18, 4, 6, 6, 6, 2, 6, 12,
+10, 20, 12, 18, 4, 6, 2, 16, 2, 10, 14, 4, 30, 2, 10, 12, 2, 24, 6, 16, 8, 10,
+2, 12, 22, 6, 2, 16, 20, 10, 2, 12, 12, 18, 10, 12, 6, 2, 10, 2, 6, 10, 18, 2,
+12, 6, 4, 6, 2, 24, 28, 2, 4, 2, 10, 2, 16, 12, 8, 22, 2, 6, 4, 2, 10, 6, 20,
+12, 10, 8, 12, 6, 6, 6, 4, 18, 2, 4, 12, 18, 2, 12, 6, 4, 2, 16, 12, 12, 14, 4,
+8, 18, 4, 12, 14, 6, 6, 4, 8, 6, 4, 20, 12, 10, 14, 4, 2, 16, 2, 12, 30, 4, 6,
+24, 20, 24, 10, 8, 12, 10, 12, 6, 12, 12, 6, 8, 16, 14, 6, 4, 6, 36, 20, 10, 30,
+12, 2, 4, 2, 28, 12, 14, 6, 22, 8, 4, 18, 6, 14, 18, 4, 6, 2, 6, 34, 18, 2, 16,
+6, 18, 2, 24, 4, 2, 6, 12, 6, 12, 10, 8, 6, 16, 12, 8, 10, 14, 40, 6, 2, 6, 4,
+12, 14, 4, 2, 4, 2, 4, 8, 6, 10, 6, 6, 2, 6, 6, 6, 12, 6, 24, 10, 2, 10, 6, 12,
+6, 6, 14, 6, 6, 52, 20, 6, 10, 2, 10, 8, 10, 12, 12, 2, 6, 4, 14, 16, 8, 12, 6,
+22, 2, 10, 8, 6, 22, 2, 22, 6, 8, 10, 12, 12, 2, 10, 6, 12, 2, 4
 };
 
 #define NUMBER_OF_PRIMES 100
 #define LAST_PRIME 557
 /* NP_SMALL_LIMIT = prevprime (LAST_PRIME ^ 2) */
-#define NP_SMALL_LIMIT 310243
+//#define NP_SMALL_LIMIT 310243
+#define NP_SMALL_LIMIT 64000000
 
 static unsigned long
 calculate_sievelimit(mp_bitcnt_t nbits) {
@@ -129,12 +222,50 @@
   ASSERT (t > 0); /* Expect t=1 if the operand was smaller.*/
   ASSERT (t < NP_SMALL_LIMIT);
 
+  /* Find all primes from end of gap to sqrt(t) */
+  const unsigned char *primegap = primegap_small;
+  unsigned char *primegap_tmp;
+
+  TMP_DECL;
+  TMP_MARK;
+
+  if (t > 310243)
+    {
+      // Dynamically size extra primes
+
+      // sqrt of t + 100
+      unsigned long sieve_limit;
+      {
+        mp_limb_t res;
+        const mp_limb_t src = t;
+        mpn_sqrtrem(&res, NULL, &src, 1);
+        sieve_limit = (unsigned long) res + 100;
+      }
+
+      mp_limb_t *sieve = TMP_SALLOC_LIMBS (primesieve_size (sieve_limit));
+      int prime_limit = gmp_primesieve (sieve, sieve_limit);
+
+      /* Needed to avoid assignment of read-only location */
+      primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char);
+      unsigned long last_prime;
+
+      int i = 0;
+      last_prime = 3;
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), sieve);
+        primegap_tmp[i++] = prime - last_prime;
+        last_prime = prime;
+      LOOP_ON_SIEVE_END;
+
+      /* Both primesieve and prime_limit ignore the first two primes. */
+      ASSERT(i == prime_limit);
+    }
+
   /* Start from next candidate (2 or odd) */
   t = (t + 1) | (t > 1);
   for (; ; t += 2)
     {
       unsigned prime = 3;
-      for (int i = 0; ; prime += primegap_small[i++])
+      for (int i = 0; ; prime += primegap[i++])
 	{
 	  unsigned q, r;
 	  q = t / prime;
@@ -143,7 +274,7 @@
 	    return t;
 	  if (r == 0)
 	    break;
-	  ASSERT (i < NUMBER_OF_PRIMES);
+//	  ASSERT (i < NUMBER_OF_PRIMES);
 	}
     }
 }
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to