Seth Troisi <brain...@gmail.com> writes:

> This was discussed previously in this thread
> https://gmplib.org/list-archives/gmp-devel/2019-September/005465.html
> TL;DR >95% of time is spent in miller rabin.

I tried out using the same dtab table used in trialdiv.c.

On my laptop, it gives a speedup of about 25% for larger sizes. Not sure
how to tune for small sizes, but clearly the old code clamping the size
of the prime table based on the bit size is better than doing nothing.

The computation of all the moduli could be speed up by using ptab, and
by using precomputed reciprocals. Not clear to me how much the speed of
that part matters. I haven't run any profiling.

Before: 

$ ./tune/speed -s 100-300 -f 1.1 mpz_nextprime
overhead 0.000000005 secs, precision 10000 units of 8.34e-10 secs, CPU freq 
1198.47 MHz
        mpz_nextprime
100       0.000134064
110       0.000145867
121       0.000170687
133       0.000301883
146       0.000341107
160       0.000403326
176       0.000451402
193       0.000745043
212       0.000790877
233       0.000896869
256       0.001118979
281       0.001641613

500       0.008096105
1000      0.091219932

After:
$ ./tune/speed -s 100-300 -f 1.1 mpz_nextprime
overhead 0.000000005 secs, precision 10000 units of 8.14e-10 secs, CPU freq 
1228.24 MHz
        mpz_nextprime
100       0.000212717
110       0.000223992
121       0.000241062
133       0.000355139
146       0.000388422
160       0.000435633
176       0.000477857
193       0.000716578
212       0.000757639
233       0.000847678
256       0.001018164
281       0.001435439

500       0.006692777
1000      0.072001893

Regards,
/Niels


/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.

Copyright 1999-2001, 2008, 2009, 2012 Free Software Foundation, Inc.

Contributed to the GNU project by Niels Möller and Torbjorn Granlund.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"
#include "longlong.h"

#if 1
struct gmp_primes_dtab {
  mp_limb_t p;
  mp_limb_t binv;
  mp_limb_t lim;
};

static const struct gmp_primes_dtab dtab[] =
{
#define WANT_dtab
#define P(p,inv,lim) {p, inv,lim}
#include "trialdivtab.h"
#undef WANT_dtab
#undef P
};

#define DTAB_SIZE (sizeof (dtab) / sizeof (dtab[0]))

void
mpz_nextprime (mpz_ptr p, mpz_srcptr n)
{
  unsigned short *moduli;
  unsigned long difference;
  int i;
  unsigned incr;
  TMP_SDECL;

  /* First handle tiny numbers */
  if (mpz_cmp_ui (n, 2) < 0)
    {
      mpz_set_ui (p, 2);
      return;
    }
  mpz_add_ui (p, n, 1);
  mpz_setbit (p, 0);

  if (mpz_cmp_ui (p, 7) <= 0)
    return;

  if (SIZ(p) == 1) 
    {
      mp_limb_t pl = PTR(p)[0];
      if (pl < SMALLEST_OMITTED_PRIME) 
	{
	  /* Simple linear search */
	  for (i = 0; i < DTAB_SIZE; i++)
	    if (dtab[i].p >= pl) 
	      {
		PTR(p)[0] = dtab[i].p;
		return;
	      }
	  PTR(p)[0] = SMALLEST_OMITTED_PRIME;
	  return;
	}
      /* FIXME: Could check if pl < SMALLEST_OMITTED_PRIME^2, or
	 generally do single-limb sieving. */
    }
  TMP_SMARK;

  /* Compute residues modulo small odd primes */
  moduli = TMP_SALLOC_TYPE (DTAB_SIZE, unsigned short);

  for (;;)
    {
      /* FIXME: Compute lazily? */
      for (i = 0; i < DTAB_SIZE; i++)
	{
	  /* FIXME: Could speedup using ptab*/
	  moduli[i] = mpz_tdiv_ui (p, dtab[i].p);
	}

#define INCR_LIMIT 0x10000	/* deep science */

      for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
	{
	  /* First check divisibility based on prime list */
	  for (i = 0; i < DTAB_SIZE; i++)
	    {
	      if ((moduli[i] + incr) * dtab[i].binv <= dtab[i].lim)
		goto next;
	    }

	  mpz_add_ui (p, p, difference);
	  difference = 0;

	  /* Miller-Rabin test */
	  if (mpz_millerrabin (p, 25))
	    goto done;
	next:;
	  incr += 2;
	}
      mpz_add_ui (p, p, difference);
      difference = 0;
    }
 done:
  TMP_SFREE;
}
#else
static const unsigned char primegap[] =
{
  2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
  2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
  4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
  12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8,
  6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6,
  6,14,4,6,6,8,6,12
};

#define NUMBER_OF_PRIMES 167

void
mpz_nextprime (mpz_ptr p, mpz_srcptr n)
{
  unsigned short *moduli;
  unsigned long difference;
  int i;
  unsigned prime_limit;
  unsigned long prime;
  mp_size_t pn;
  mp_bitcnt_t nbits;
  unsigned incr;
  TMP_SDECL;

  /* First handle tiny numbers */
  if (mpz_cmp_ui (n, 2) < 0)
    {
      mpz_set_ui (p, 2);
      return;
    }
  mpz_add_ui (p, n, 1);
  mpz_setbit (p, 0);

  if (mpz_cmp_ui (p, 7) <= 0)
    return;

  pn = SIZ(p);
  MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
  if (nbits / 2 >= NUMBER_OF_PRIMES)
    prime_limit = NUMBER_OF_PRIMES - 1;
  else
    prime_limit = nbits / 2;

  TMP_SMARK;

  /* Compute residues modulo small odd primes */
  moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short);

  for (;;)
    {
      /* FIXME: Compute lazily? */
      prime = 3;
      for (i = 0; i < prime_limit; i++)
	{
	  moduli[i] = mpz_tdiv_ui (p, prime);
	  prime += primegap[i];
	}

#define INCR_LIMIT 0x10000	/* deep science */

      for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
	{
	  /* First check residues */
	  prime = 3;
	  for (i = 0; i < prime_limit; i++)
	    {
	      unsigned r;
	      /* FIXME: Reduce moduli + incr and store back, to allow for
		 division-free reductions.  Alternatively, table primes[]'s
		 inverses (mod 2^16).  */
	      r = (moduli[i] + incr) % prime;
	      prime += primegap[i];

	      if (r == 0)
		goto next;
	    }

	  mpz_add_ui (p, p, difference);
	  difference = 0;

	  /* Miller-Rabin test */
	  if (mpz_millerrabin (p, 25))
	    goto done;
	next:;
	  incr += 2;
	}
      mpz_add_ui (p, p, difference);
      difference = 0;
    }
 done:
  TMP_SFREE;
}

#endif
-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to