On Mon, Mar 9, 2020 at 5:03 AM Marco Bodrato <bodr...@mail.dm.unipi.it> wrote:
> Ciao, > > Il 2020-03-09 11:59 Seth Troisi ha scritto: > > On Mon, Mar 9, 2020 at 2:05 AM Marco Bodrato > > <bodr...@mail.dm.unipi.it> wrote: > > > >> Ciao, > >> > >> Il 2020-03-09 02:56 Seth Troisi ha scritto: > > > It's highly possible I misunderstand how these macros are supposed to > > be used. > > I also dislike the boiler plate of the macros but I didn't like > > When I say "that code is messy", I mean my code. > And you agree :-) those macros are just easy to misunderstand :-/ > > > If you have suggestions or code pointers for a better pattern I'd > > appreciate that. > > >> Did you try to use the gmp_primesieve function directly? > > > > I'm not sure what you're referencing here, I tried with > > I'm proposing to get rid of primegap, and use something like the > following. > This is just a copy-paste from your code, not a really working sequence! > > + __sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit)); > + prime_limit_pre = gmp_primesieve(__sieve, sieve_limit); > > + next_mult = TMP_ALLOC_TYPE (prime_limit, unsigned int); > [...] > + /* Invert p for modulo */ > + mpz_neg(p, p); > > [..handle 3 outside the loop] > > + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), > 0, __sieve); > + m = mpz_fdiv_ui(p, prime); > + /* Only care about odd multiplies (even indexes) */ > + if (m & 1) > + m += prime; > + > + /* mark off any composites in sieve */ > + for (; m < INCR_LIMIT; m += 2*prime) > + sieve[m] = 1; > + next_mult[i] = m; > + LOOP_ON_SIEVE_END; > + mpz_neg(p, p); > > [...] > > + /* Sieve next segment */ > [..handle 3] > + memset(sieve, 0, INCR_LIMIT); > + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), > 0, __sieve); > + m = next_mult[i] - INCR_LIMIT; > + for (; m < INCR_LIMIT; m += prime * 2) > + sieve[m] = 1; > + next_mult[i] = m; > + LOOP_ON_SIEVE_END; > When nbits is small (IMHO the most common case) we'd like to be able to fallback to a constant array which I don't think exist for primesieve (because gmp_limb_bits isn't constant). So I lowered the threshold to 150M (50MB) in the future I have a trick that will reduce memory usage and we can try to increase this limit. > > Is there a way to unalloc a TMP_ALLOC_LIMBS before TMP_FREE? > > No, if you want to unalloc, you must use __GMP_ALLOCATE_FUNC_TYPE, > __GMP_REALLOCATE_FUNC_TYPE, and __GMP_FREE_FUNC_TYPE. > > > Sieve next segment is rare, the average gap is ln(n), if INCR_LIMIT > > was changed to be a variable it could be set > > so /*sieve next segment*/ happened on average less than once, and then > > Why not using a variable for INCR_LIMIT? > It will be another change with more moving parts. I'm hoping to do it after the large part of this change goes in. > > >> In the array char *sieve, you use only the even entries. Maybe you > >> can reduce its size by half, and remove some "*2" around the code? > > > > it's only 4000 entries so I wasn't bothering at this time, but it > > I agree. > Fixed now. > > PS: did you consider mpz_Cdiv_ui, instead of _neg,_Fdiv_ui,_neg ? > Nope, this is much cleaner. Thanks. > > Ĝis, > m >
diff -r bcca14c8a090 mpz/nextprime.c --- a/mpz/nextprime.c Thu Mar 05 00:43:26 2020 +0100 +++ b/mpz/nextprime.c Mon Mar 09 17:12:37 2020 -0700 @@ -30,10 +30,64 @@ GNU Lesser General Public License along with the GNU MP Library. If not, see https://www.gnu.org/licenses/. */ +#include <string.h> +#include <sys/time.h> + #include "gmp-impl.h" #include "longlong.h" -static const unsigned char primegap[] = +/*********************************************************/ +/* Section sieve: sieving functions and tools for primes */ +/*********************************************************/ + +static mp_limb_t +id_to_n (mp_limb_t id) { return id*3+1+(id&1); } + +static mp_limb_t +n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } + +static mp_size_t +primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } + + +/*************************************************************/ +/* Section macros: common macros, for swing/fac/bin (&sieve) */ +/*************************************************************/ + +#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ + __max_i = (end); \ + \ + do { \ + ++__i; \ + if (((sieve)[__index] & __mask) == 0) \ + { \ + mp_limb_t prime; \ + prime = id_to_n(__i) + +#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ + do { \ + mp_limb_t __mask, __index, __max_i, __i; \ + \ + __i = (start)-(off); \ + __index = __i / GMP_LIMB_BITS; \ + __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ + __i += (off); \ + \ + LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) + +#define LOOP_ON_SIEVE_STOP \ + } \ + __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ + __index += __mask & 1; \ + } while (__i <= __max_i) + +#define LOOP_ON_SIEVE_END \ + LOOP_ON_SIEVE_STOP; \ + } while (0) + + + +static const unsigned char primegap_small[] = { 2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6, 2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2, @@ -48,15 +102,17 @@ void mpz_nextprime (mpz_ptr p, mpz_srcptr n) { - unsigned short *moduli; - unsigned long difference; - int i; + unsigned int *next_mult; + char *composite; + const unsigned char *primegap; unsigned prime_limit; - unsigned long prime; + unsigned prime; mp_size_t pn; mp_bitcnt_t nbits; + int i, m; unsigned incr; - TMP_SDECL; + unsigned long difference; + TMP_DECL; /* First handle tiny numbers */ if (mpz_cmp_ui (n, 2) < 0) @@ -70,59 +126,165 @@ if (mpz_cmp_ui (p, 7) <= 0) return; + // Temp code for benchmarking. + int print = mpz_fdiv_ui(p, 65536) <= 1; + struct timeval tval_before, tval_after, tval_result; + if (print) + gettimeofday(&tval_before, NULL); + + TMP_MARK; pn = SIZ(p); MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1); - if (nbits / 2 >= NUMBER_OF_PRIMES) - prime_limit = NUMBER_OF_PRIMES - 1; + if (nbits <= 199) + { + prime_limit = nbits / 2; + primegap = primegap_small; + } else - prime_limit = nbits / 2; + { + mp_limb_t *sieve; + unsigned char *primegap_tmp; + long sieve_limit; + int last_prime; + + // Estimate a good sieve bound. Based on derivative of + // Merten's 3rd theorem * avg gap * cost of mod + // vs + // Cost of PRP test O(N^2.55) + if (nbits > 12400) + { + /* Larger threshold is faster but takes ~O(5*n/ln(n)) memory. + * For 33,000 bits limitting to 150M is ~12% slower than using the + * optimal 1.5B sieve_limit. + */ + sieve_limit = 150000000; + } + else + { + mpz_t tmp; + // sieve_limit ~= nbits ^ (5/2) / 124 + mpz_init (tmp); + mpz_ui_pow_ui (tmp, nbits, 5); + mpz_root (tmp, tmp, 2); + mpz_tdiv_q_ui(tmp, tmp, 124); + + // avoid having a primegap > 255, first occurence: 436,273,009 + // TODO: break the rare gap larger than this into gaps <= 255 + sieve_limit = mpz_get_ui(tmp); + mpz_clear (tmp); + } + + /* Avoid having a primegap > 255, first occurence 436,273,009. */ + ASSERT( 4000 < sieve_limit && sieve_limit < 436000000 ); + + /* sieve numbers up to sieve_limit and save prime count */ + sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit)); + prime_limit = gmp_primesieve(sieve, sieve_limit); + /* Needed to avoid assignment of read-only location */ + primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char); + primegap = primegap_tmp; + + i = 0; + last_prime = 3; + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), 0, sieve); + primegap_tmp[i++] = prime - last_prime; + last_prime = prime; + LOOP_ON_SIEVE_END; - TMP_SMARK; + /* Both primesieve and prime_limit ignore the first two primes. */ + ASSERT(i == prime_limit); + + // Temp code for benchmarking. + if (print) { + gmp_printf("Pi(%d) = %d | ", sieve_limit, prime_limit); + } + } + + // Temp code for benchmarking. + if (print) { + gettimeofday(&tval_after, NULL); + timersub(&tval_after, &tval_before, &tval_result); + float seconds = tval_result.tv_sec; + seconds += tval_result.tv_usec / 1e6; + gmp_printf("%d bits, %d primes %.4f ", nbits, prime_limit, seconds); + } + + /* Compute next multiple for small odd primes */ + next_mult = TMP_ALLOC_TYPE (prime_limit, unsigned int); + + // TODO set this based on nbits. + #define INCR_LIMIT 0x400 + composite = TMP_SALLOC_TYPE (INCR_LIMIT, char); + + /* composite[2*i] remembers if p+2*i is a known composite */ + memset (composite, 0, INCR_LIMIT); - /* Compute residues modulo small odd primes */ - moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short); + /* Invert p for modulo */ + prime = 3; + for (i = 0; i < prime_limit; i++) + { + m = mpz_cdiv_ui(p, prime); + /* Only care about odd multiplies */ + if (m & 1) + m += prime; + m >>= 1; + /* mark off any composites in sieve */ + for (; m < INCR_LIMIT; m += prime) + composite[m] = 1; + next_mult[i] = m; + prime += primegap[i]; + } + + // Temp code for benchmarking. + if (print) { + gettimeofday(&tval_before, NULL); + timersub(&tval_before, &tval_after, &tval_result); + float seconds = tval_result.tv_sec; + seconds += tval_result.tv_usec / 1e6; + gmp_printf(", mod %.4f | ", seconds); + } + int tests = 0; + + difference = 0; for (;;) { - /* FIXME: Compute lazily? */ - prime = 3; - for (i = 0; i < prime_limit; i++) - { - moduli[i] = mpz_tdiv_ui (p, prime); - prime += primegap[i]; - } + for (incr = 0; incr < INCR_LIMIT; difference += 2, incr += 1) + { + if (composite[incr]) + continue; -#define INCR_LIMIT 0x10000 /* deep science */ + mpz_add_ui (p, p, difference); + difference = 0; + tests += 1; - for (difference = incr = 0; incr < INCR_LIMIT; difference += 2) - { - /* First check residues */ - prime = 3; - for (i = 0; i < prime_limit; i++) - { - unsigned r; - /* FIXME: Reduce moduli + incr and store back, to allow for - division-free reductions. Alternatively, table primes[]'s - inverses (mod 2^16). */ - r = (moduli[i] + incr) % prime; - prime += primegap[i]; + /* Miller-Rabin test */ + if (mpz_millerrabin (p, 25)) + goto done; + } - if (r == 0) - goto next; - } - - mpz_add_ui (p, p, difference); - difference = 0; - - /* Miller-Rabin test */ - if (mpz_millerrabin (p, 25)) - goto done; - next:; - incr += 2; - } - mpz_add_ui (p, p, difference); - difference = 0; + /* Sieve next segment */ + prime = 3; + memset (composite, 0, INCR_LIMIT); + for (i = 0; i < prime_limit; i++) + { + m = next_mult[i] - INCR_LIMIT; + for (; m < INCR_LIMIT; m += prime) + composite[m] = 1; + next_mult[i] = m; + prime += primegap[i]; + } } done: + // Temp code for benchmarking. + if (print) { + gmp_printf("=> %d\n", tests); + } + TMP_FREE; TMP_SFREE; } + +#undef LOOP_ON_SIEVE_END +#undef LOOP_ON_SIEVE_STOP +#undef LOOP_ON_SIEVE_BEGIN +#undef LOOP_ON_SIEVE_CONTINUE
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel