ni...@lysator.liu.se (Niels Möller) writes:

> But... I get even better numbers if I keep the old code and just replace
> the div1 function with plain division q = a / b:

Attaching deletion patch. Tried it also on my broadwell machine.
Benchmarking is less reliable there, but seems to give about 10% gcd
speedup in the range 3-10 limbs.

> It should be fairly easy to find out, if we define a HGCD_DIV1_METHOD
> known to tuneup, to select between plain division and the div1 function.

Is there any even easier way to find out on which machines (if any) div1
improves performance? It's still a bit awkward to add tuning of
HGCD_DIV1_METHOD and wait for the nightly builds.

Regards,
/Niels

*** /tmp/extdiff.kabOXn/gmp.228585220bca/mpn/generic/hgcd2.c	2019-09-01 02:13:52.000000000 +0200
--- /home/nisse/hack/gmp/mpn/generic/hgcd2.c	2019-09-03 10:34:01.477332932 +0200
*************** see https://www.gnu.org/licenses/.  */
*** 41,98 ****
     the remainder. */
  
- /* Single-limb division optimized for small quotients. */
- static inline mp_limb_t
- div1 (mp_ptr rp,
-       mp_limb_t n0,
-       mp_limb_t d0)
- {
-   mp_limb_t q = 0;
- 
-   if ((mp_limb_signed_t) n0 < 0)
-     {
-       int cnt;
-       for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
- 	{
- 	  d0 = d0 << 1;
- 	}
- 
-       q = 0;
-       while (cnt)
- 	{
- 	  q <<= 1;
- 	  if (n0 >= d0)
- 	    {
- 	      n0 = n0 - d0;
- 	      q |= 1;
- 	    }
- 	  d0 = d0 >> 1;
- 	  cnt--;
- 	}
-     }
-   else
-     {
-       int cnt;
-       for (cnt = 0; n0 >= d0; cnt++)
- 	{
- 	  d0 = d0 << 1;
- 	}
- 
-       q = 0;
-       while (cnt)
- 	{
- 	  d0 = d0 >> 1;
- 	  q <<= 1;
- 	  if (n0 >= d0)
- 	    {
- 	      n0 = n0 - d0;
- 	      q |= 1;
- 	    }
- 	  cnt--;
- 	}
-     }
-   *rp = n0;
-   return q;
- }
- 
  /* Two-limb division optimized for small quotients.  */
  static inline mp_limb_t
--- 41,44 ----
*************** mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, m
*** 361,367 ****
        else
  	{
! 	  mp_limb_t r;
! 	  mp_limb_t q = div1 (&r, ah, bh);
! 	  ah = r;
  	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
  	    {
--- 307,312 ----
        else
  	{
! 	  mp_limb_t q = ah / bh;
! 	  ah -= q * bh;
  	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
  	    {
*************** mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, m
*** 390,396 ****
        else
  	{
! 	  mp_limb_t r;
! 	  mp_limb_t q = div1 (&r, bh, ah);
! 	  bh = r;
  	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
  	    {
--- 335,340 ----
        else
  	{
! 	  mp_limb_t q = bh / ah;
! 	  bh -= q * ah;
  	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
  	    {
-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to