mul_fft, cleaning up some details of the code

Marco Bodrato Sun, 06 Mar 2022 02:15:00 -0800

Ciao,

I looked into the code published by Samuel Vivien.

I realised that in mul_fft there are a lot of _add_1 and _sub_1. Atleast some of them can be easily replaced by _INCR_U or _DECR_U...

Specifically I'd focus into a suspicious piece of code, shared by bothour current code and Vivien's.


The function mpn_mul_fft_decompose has a branch "if (dif > Kl)", that
ends with the following lines:

          if (cy >= 0)
            cy = mpn_add_1 (tmp, tmp, Kl, cy);
          else
            cy = mpn_sub_1 (tmp, tmp, Kl, -cy);

Can those lines be correct? Can the value in cy be used after this codeboth if it contains the carry or if it contains the borrow of theoperation on tmp?


I believe this is an error, I'd change the last line into
        cy = 1 - mpn_sub_1 (tmp, tmp, Kl, -cy - 1);

and I propose the attached patch changing this and some other _1functions.


This is not really needed to solve a bug.

The comment before the mpn_mul_fft_decompose function says "We must havenl <= 2*K*l", this means that we should never have ((dif = nl - Kl) >Kl), and the code in that branch should never be used.According to the coverage analisys, the code is not explored by thetests:

https://gmplib.org/devel/lcov/shell/var/tmp/lcov/gmp/mpn/mul_fft.c.gcov#691
The "bug" is never triggered.

Maybe the branch could be used if someone re-enables mpn_mul_fft_fulland uses it for very unbalanced (more than 4x1) operands?


Ĝis,
m

diff -r 6ff25532f2a4 mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c	Fri Mar 04 10:03:38 2022 +0100
+++ b/mpn/generic/mul_fft.c	Sun Mar 06 09:41:58 2022 +0100
@@ -237,14 +237,14 @@
 
       r[n] = 0;
       /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
-      cc++;
-      mpn_incr_u (r, cc);
+      ++cc;
+      MPN_INCR_U (r, n + 1, cc);
 
-      rd++;
+      ++rd;
       /* rd might overflow when sh=GMP_NUMB_BITS-1 */
-      cc = (rd == 0) ? 1 : rd;
+      cc = rd + (rd == 0);
       r = r + m + (rd == 0);
-      mpn_incr_u (r, cc);
+      MPN_INCR_U (r, n + 1 - m - (rd == 0), cc);
     }
   else
     {
@@ -284,7 +284,10 @@
       r[n] = -mpn_sub_1 (r + m, r + m, n - m, cc);
       r[n] -= mpn_sub_1 (r + m, r + m, n - m, rd);
       if (r[n] & GMP_LIMB_HIGHBIT)
-	r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
+	{
+	  r[n] = 0;
+	  MPN_INCR_U (r, n + 1, CNST_LIMB(1));
+	}
     }
 }
 
@@ -560,7 +563,9 @@
 	      */
 	      tp[0] += cc;
 	    }
-	  a[n] = mpn_sub_n (a, tp, tpn, n) && mpn_add_1 (a, a, n, CNST_LIMB(1));
+	  cc = mpn_sub_n (a, tp, tpn, n);
+	  a[n] = 0;
+	  MPN_INCR_U (a, n + 1, cc);
 	}
     }
   TMP_FREE;
@@ -657,8 +662,7 @@
     }
 
   /* remains to subtract {ap+n, l} from {rp, n+1} */
-  cc = mpn_sub_n (rp, rp, ap + n, l);
-  rpn -= mpn_sub_1 (rp + l, rp + l, n - l, cc);
+  rpn -= mpn_sub (rp, rp, n, ap + n, l);
   if (rpn < 0) /* necessarily rpn = -1 */
     rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
   return rpn;
@@ -688,7 +692,10 @@
 
       tmp = TMP_BALLOC_LIMBS(Kl + 1);
 
-      if (dif > Kl)
+      tmp[Kl] = 0;
+      /* The comment "We must have nl <= 2*K*l." says that
+	 ((dif = nl - Kl) > Kl) should never happen. */
+      if (UNLIKELY (dif > Kl))
 	{
 	  int subp = 0;
 
@@ -713,16 +720,18 @@
 	  else
 	    cy -= mpn_add (tmp, tmp, Kl, n, dif);
 	  if (cy >= 0)
-	    cy = mpn_add_1 (tmp, tmp, Kl, cy);
+	    MPN_INCR_U (tmp, Kl + 1, cy);
 	  else
-	    cy = mpn_sub_1 (tmp, tmp, Kl, -cy);
+	    {
+	      tmp[Kl] = 1;
+	      MPN_DECR_U (tmp, Kl + 1, -cy - 1);
+	    }
 	}
       else /* dif <= Kl, i.e. nl <= 2 * Kl */
 	{
 	  cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
-	  cy = mpn_add_1 (tmp, tmp, Kl, cy);
+	  MPN_INCR_U (tmp, Kl + 1, cy);
 	}
-      tmp[Kl] = cy;
       nl = Kl + 1;
       n = tmp;
     }
@@ -755,7 +764,7 @@
 
 static mp_limb_t
 mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
-		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr unusedA, mp_ptr B,
 		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
 		      int **fft_l, mp_ptr T, int sqr)
 {
@@ -797,9 +806,7 @@
 
       j = (K - i) & (K - 1);
 
-      if (mpn_add_n (n, n, Bp[j], nprime + 1))
-	cc += mpn_add_1 (n + nprime + 1, n + nprime + 1,
-			  pla - sh - nprime - 1, CNST_LIMB(1));
+      cc += mpn_add (n, n, pla - sh, Bp[j], nprime + 1);
       T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
       if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
 	{ /* subtract 2^N'+1 */
@@ -825,8 +832,7 @@
 	}
       else
 	{
-	  cc = mpn_sub_1 (p + pla - pl, p + pla - pl, pl, cc);
-	  ASSERT (cc == 0);
+	  MPN_DECR_U (p + pla - pl, pl, cc);
 	}
     }
   else
@@ -918,18 +924,17 @@
 
   A = TMP_BALLOC_LIMBS (K * (nprime + 1));
   Ap = TMP_BALLOC_MP_PTRS (K);
+  Bp = TMP_BALLOC_MP_PTRS (K);
   mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
   if (sqr)
     {
       mp_size_t pla;
       pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
       B = TMP_BALLOC_LIMBS (pla);
-      Bp = TMP_BALLOC_MP_PTRS (K);
     }
   else
     {
       B = TMP_BALLOC_LIMBS (K * (nprime + 1));
-      Bp = TMP_BALLOC_MP_PTRS (K);
       mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
     }
   h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
@@ -1039,6 +1044,6 @@
   ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
   __GMP_FREE_FUNC_LIMBS (pad_op, pl2);
   /* since the final result has at most pl limbs, no carry out below */
-  mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
+  MPN_INCR_U (op + pl2, pl - pl2, (mp_limb_t) c2);
 }
 #endif

_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

mul_fft, cleaning up some details of the code

Reply via email to