On Mon, Mar 9, 2020 at 5:03 AM Marco Bodrato <bodr...@mail.dm.unipi.it>
wrote:

> Ciao,
>
> Il 2020-03-09 11:59 Seth Troisi ha scritto:
> > On Mon, Mar 9, 2020 at 2:05 AM Marco Bodrato
> > <bodr...@mail.dm.unipi.it> wrote:
> >
> >> Ciao,
> >>
> >> Il 2020-03-09 02:56 Seth Troisi ha scritto:
>
> > It's highly possible I misunderstand how these macros are supposed to
> > be used.
> > I also dislike the boiler plate of the macros but I didn't like
>
> When I say "that code is messy", I mean my code.
> And you agree :-) those macros are just easy to misunderstand :-/
>
> > If you have suggestions or code pointers for a better pattern I'd
> > appreciate that.
>
> >> Did you try to use the gmp_primesieve function directly?
> >
> > I'm not sure what you're referencing here, I tried with
>
> I'm proposing to get rid of primegap, and use something like the
> following.
> This is just a copy-paste from your code, not a really working sequence!
>
> +      __sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit));
> +      prime_limit_pre = gmp_primesieve(__sieve, sieve_limit);
>
> +  next_mult = TMP_ALLOC_TYPE (prime_limit, unsigned int);
> [...]
> +  /* Invert p for modulo */
> +  mpz_neg(p, p);
>
> [..handle 3 outside the loop]
>
> +    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit),
> 0, __sieve);
> +      m = mpz_fdiv_ui(p, prime);
> +      /* Only care about odd multiplies (even indexes) */
> +      if (m & 1)
> +        m += prime;
> +
> +      /* mark off any composites in sieve */
> +      for (; m < INCR_LIMIT; m += 2*prime)
> +        sieve[m] = 1;
> +      next_mult[i] = m;
> +    LOOP_ON_SIEVE_END;
> +  mpz_neg(p, p);
>
> [...]
>
> +      /* Sieve next segment */
> [..handle 3]
> +      memset(sieve, 0, INCR_LIMIT);
> +      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit),
> 0, __sieve);
> +          m = next_mult[i] - INCR_LIMIT;
> +          for (; m < INCR_LIMIT; m += prime * 2)
> +            sieve[m] = 1;
> +          next_mult[i] = m;
> +      LOOP_ON_SIEVE_END;
>
When nbits is small (IMHO the most common case) we'd like to be able to
fallback to a constant array which I don't think exist for primesieve
(because gmp_limb_bits isn't constant).
So I lowered the threshold to 150M (50MB) in the future I have a trick that
will reduce memory usage and we can try to increase this limit.


> > Is there a way to unalloc a TMP_ALLOC_LIMBS before TMP_FREE?
>
> No, if you want to unalloc, you must use __GMP_ALLOCATE_FUNC_TYPE,
> __GMP_REALLOCATE_FUNC_TYPE, and __GMP_FREE_FUNC_TYPE.
>
> > Sieve next segment is rare, the average gap is ln(n), if INCR_LIMIT
> > was changed to be a variable it could be set
> > so /*sieve next segment*/ happened on average less than once, and then
>
> Why not using a variable for INCR_LIMIT?
>
It will be another change with more moving parts. I'm hoping to do it after
the large part of this change goes in.

>
> >> In the array char *sieve, you use only the even entries. Maybe you
> >> can reduce its size by half, and remove some "*2" around the code?
> >
> > it's only 4000 entries so I wasn't bothering at this time, but it
>
> I agree.
>
Fixed now.

>
> PS: did you consider mpz_Cdiv_ui, instead of _neg,_Fdiv_ui,_neg ?
>
Nope, this is much cleaner. Thanks.

>
> Ĝis,
> m
>
diff -r bcca14c8a090 mpz/nextprime.c
--- a/mpz/nextprime.c	Thu Mar 05 00:43:26 2020 +0100
+++ b/mpz/nextprime.c	Mon Mar 09 17:12:37 2020 -0700
@@ -30,10 +30,64 @@
 GNU Lesser General Public License along with the GNU MP Library.  If not,
 see https://www.gnu.org/licenses/.  */
 
+#include <string.h>
+#include <sys/time.h>
+
 #include "gmp-impl.h"
 #include "longlong.h"
 
-static const unsigned char primegap[] =
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)     \
+    __max_i = (end);            \
+                \
+    do {              \
+      ++__i;              \
+      if (((sieve)[__index] & __mask) == 0)     \
+  {             \
+          mp_limb_t prime;          \
+    prime = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)    \
+  do {                \
+    mp_limb_t __mask, __index, __max_i, __i;      \
+                \
+    __i = (start)-(off);          \
+    __index = __i / GMP_LIMB_BITS;        \
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);   \
+    __i += (off);           \
+                \
+    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP          \
+  }             \
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+      __index += __mask & 1;          \
+    }  while (__i <= __max_i)
+
+#define LOOP_ON_SIEVE_END         \
+    LOOP_ON_SIEVE_STOP;           \
+  } while (0)
+
+
+
+static const unsigned char primegap_small[] =
 {
   2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
   2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
@@ -48,15 +102,17 @@
 void
 mpz_nextprime (mpz_ptr p, mpz_srcptr n)
 {
-  unsigned short *moduli;
-  unsigned long difference;
-  int i;
+  unsigned int *next_mult;
+  char *composite;
+  const unsigned char *primegap;
   unsigned prime_limit;
-  unsigned long prime;
+  unsigned prime;
   mp_size_t pn;
   mp_bitcnt_t nbits;
+  int i, m;
   unsigned incr;
-  TMP_SDECL;
+  unsigned long difference;
+  TMP_DECL;
 
   /* First handle tiny numbers */
   if (mpz_cmp_ui (n, 2) < 0)
@@ -70,59 +126,165 @@
   if (mpz_cmp_ui (p, 7) <= 0)
     return;
 
+  // Temp code for benchmarking.
+      int print = mpz_fdiv_ui(p, 65536) <= 1;
+      struct timeval tval_before, tval_after, tval_result;
+      if (print)
+        gettimeofday(&tval_before, NULL);
+
+  TMP_MARK;
   pn = SIZ(p);
   MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
-  if (nbits / 2 >= NUMBER_OF_PRIMES)
-    prime_limit = NUMBER_OF_PRIMES - 1;
+  if (nbits <= 199)
+    {
+      prime_limit = nbits / 2;
+      primegap = primegap_small;
+    }
   else
-    prime_limit = nbits / 2;
+    {
+      mp_limb_t *sieve;
+      unsigned char *primegap_tmp;
+      long sieve_limit;
+      int last_prime;
+
+      // Estimate a good sieve bound. Based on derivative of
+      //   Merten's 3rd theorem * avg gap * cost of mod
+      //      vs
+      //   Cost of PRP test O(N^2.55)
+      if (nbits > 12400)
+        {
+          /* Larger threshold is faster but takes ~O(5*n/ln(n)) memory.
+           * For 33,000 bits limitting to 150M is ~12% slower than using the
+           * optimal 1.5B sieve_limit.
+           */
+          sieve_limit = 150000000;
+        }
+      else
+        {
+          mpz_t tmp;
+          // sieve_limit ~= nbits ^ (5/2) / 124
+          mpz_init (tmp);
+          mpz_ui_pow_ui (tmp, nbits, 5);
+          mpz_root (tmp, tmp, 2);
+          mpz_tdiv_q_ui(tmp, tmp, 124);
+
+          // avoid having a primegap > 255, first occurence: 436,273,009
+          // TODO: break the rare gap larger than this into gaps <= 255
+          sieve_limit = mpz_get_ui(tmp);
+          mpz_clear (tmp);
+        }
+
+      /* Avoid having a primegap > 255, first occurence 436,273,009. */
+      ASSERT( 4000 < sieve_limit && sieve_limit < 436000000 );
+
+      /* sieve numbers up to sieve_limit and save prime count */
+      sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit));
+      prime_limit = gmp_primesieve(sieve, sieve_limit);
+      /* Needed to avoid assignment of read-only location */
+      primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char);
+      primegap = primegap_tmp;
+
+      i = 0;
+      last_prime = 3;
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), 0, sieve);
+        primegap_tmp[i++] = prime - last_prime;
+        last_prime = prime;
+      LOOP_ON_SIEVE_END;
 
-  TMP_SMARK;
+      /* Both primesieve and prime_limit ignore the first two primes. */
+      ASSERT(i == prime_limit);
+
+      // Temp code for benchmarking.
+          if (print) {
+            gmp_printf("Pi(%d) = %d | ", sieve_limit, prime_limit);
+          }
+    }
+
+  // Temp code for benchmarking.
+      if (print) {
+        gettimeofday(&tval_after, NULL);
+        timersub(&tval_after, &tval_before, &tval_result);
+        float seconds = tval_result.tv_sec;
+        seconds += tval_result.tv_usec / 1e6;
+        gmp_printf("%d bits, %d primes  %.4f ", nbits, prime_limit, seconds);
+      }
+
+  /* Compute next multiple for small odd primes */
+  next_mult = TMP_ALLOC_TYPE (prime_limit, unsigned int);
+
+  // TODO set this based on nbits.
+  #define INCR_LIMIT 0x400
+  composite = TMP_SALLOC_TYPE (INCR_LIMIT, char);
+
+  /* composite[2*i] remembers if p+2*i is a known composite */
+  memset (composite, 0, INCR_LIMIT);
 
-  /* Compute residues modulo small odd primes */
-  moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short);
+  /* Invert p for modulo */
+  prime = 3;
+  for (i = 0; i < prime_limit; i++)
+    {
+      m = mpz_cdiv_ui(p, prime);
+      /* Only care about odd multiplies */
+      if (m & 1)
+        m += prime;
 
+      m >>= 1;
+      /* mark off any composites in sieve */
+      for (; m < INCR_LIMIT; m += prime)
+        composite[m] = 1;
+      next_mult[i] = m;
+      prime += primegap[i];
+    }
+
+  // Temp code for benchmarking.
+      if (print) {
+        gettimeofday(&tval_before, NULL);
+        timersub(&tval_before, &tval_after, &tval_result);
+        float seconds = tval_result.tv_sec;
+        seconds += tval_result.tv_usec / 1e6;
+        gmp_printf(", mod %.4f  | ", seconds);
+      }
+      int tests = 0;
+
+  difference = 0;
   for (;;)
     {
-      /* FIXME: Compute lazily? */
-      prime = 3;
-      for (i = 0; i < prime_limit; i++)
-	{
-	  moduli[i] = mpz_tdiv_ui (p, prime);
-	  prime += primegap[i];
-	}
+      for (incr = 0; incr < INCR_LIMIT; difference += 2, incr += 1)
+        {
+          if (composite[incr])
+            continue;
 
-#define INCR_LIMIT 0x10000	/* deep science */
+          mpz_add_ui (p, p, difference);
+          difference = 0;
+          tests += 1;
 
-      for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
-	{
-	  /* First check residues */
-	  prime = 3;
-	  for (i = 0; i < prime_limit; i++)
-	    {
-	      unsigned r;
-	      /* FIXME: Reduce moduli + incr and store back, to allow for
-		 division-free reductions.  Alternatively, table primes[]'s
-		 inverses (mod 2^16).  */
-	      r = (moduli[i] + incr) % prime;
-	      prime += primegap[i];
+          /* Miller-Rabin test */
+          if (mpz_millerrabin (p, 25))
+            goto done;
+        }
 
-	      if (r == 0)
-		goto next;
-	    }
-
-	  mpz_add_ui (p, p, difference);
-	  difference = 0;
-
-	  /* Miller-Rabin test */
-	  if (mpz_millerrabin (p, 25))
-	    goto done;
-	next:;
-	  incr += 2;
-	}
-      mpz_add_ui (p, p, difference);
-      difference = 0;
+      /* Sieve next segment */
+      prime = 3;
+      memset (composite, 0, INCR_LIMIT);
+      for (i = 0; i < prime_limit; i++)
+        {
+          m = next_mult[i] - INCR_LIMIT;
+          for (; m < INCR_LIMIT; m += prime)
+            composite[m] = 1;
+          next_mult[i] = m;
+          prime += primegap[i];
+        }
     }
  done:
+  // Temp code for benchmarking.
+      if (print) {
+        gmp_printf("=> %d\n", tests);
+      }
+  TMP_FREE;
   TMP_SFREE;
 }
+
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to