I had a play with the karatsuba function today. I figured that one
might be able to cut out some overhead by having it complete the last
two iterations of the recursion in one go. I wrote some code which
works for the last two iterations when n/2, at that point, is
divisible by 4. But after fiddling for quite some time to try and find
a way to make the existing code faster with this as a special case, I
have not seen any speedups. Anyhow, here is the code, in case someone
else had some ideas. It obviously only works on systems with
mpn_addadd_n and mpn_addsub_n.

#define MPN_KARA_SUB(pxxx, signxxx, axxx, n2xxx) \
do \
{ \
  i = n2xxx; \
  do \
  { \
         --i; \
         w0 = (axxx)[i]; \
         w1 = (axxx)[n2xxx + i]; \
  } while (w0 == w1 && i != 0); \
  if (w0 < w1) \
  { \
         x = axxx + n2xxx; \
         y = axxx; \
         signxxx = ~signxxx; \
  } else \
  { \
         x = axxx; \
         y = axxx + n2xxx; \
  } \
  mpn_sub_n (pxxx, x, y, n2xxx); \
} while (0)

void
mpn_kara_mul_4n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n,
mp_ptr ws)
{
  /* main vars */

  mp_limb_t w, w0, w1;
  mp_size_t n2;
  mp_srcptr x, y;
  mp_size_t i;
  int sign, ci;

  /* kara 1 vars */
  mp_size_t n21;
  int sign1;

  /* kara 2 vars */
  mp_size_t n22;
  int sign2;

  /* kara 3 vars */
  mp_size_t n23;
  int sign3;

  n2 = n >> 1;
  sign = 0;

  MPN_KARA_SUB(p, sign, a, n2);
  MPN_KARA_SUB(p + n2, sign, b, n2);

  /* Kara 1 */

  //mpn_mul_basecase (ws, p, n2, p + n2, n2);

  n21 = n2 >> 1;
  sign1 = 0;

  MPN_KARA_SUB(ws, sign1, p, n21);
  MPN_KARA_SUB(ws + n21, sign1, p + n2, n21);

  mpn_mul_basecase (ws + n, ws, n21, ws + n21, n21);
  mpn_mul_basecase (ws, p, n21, p + n2, n21);
  mpn_mul_basecase (ws + n2, p + n21, n21, p + n2 + n21, n21);

  if (sign1) w = mpn_addadd_n(ws + n, ws + n2, ws, ws + n, n2);
  else w = mpn_addsub_n(ws + n, ws + n2, ws, ws + n, n2);

  w += mpn_add_n (ws + n21, ws + n21, ws + n, n2);
  MPN_INCR_U (ws + n21 + n2, 2 * n2 - (n21 + n2), w);

  /* Kara 2 */

  //mpn_mul_basecase (p, a, n2, b, n2);

  n22 = n2 >> 1;
  sign2 = 0;

  MPN_KARA_SUB(p, sign2, a, n22);
  MPN_KARA_SUB(p + n22, sign2, b, n22);

  mpn_mul_basecase (ws + n, p, n22, p + n22, n22);
  mpn_mul_basecase (p, a, n22, b, n22);
  mpn_mul_basecase (p + n2, a + n22, n22, b + n22, n22);

  if (sign2) w = mpn_addadd_n(ws + n, p + n2, p, ws + n, n2);
  else w = mpn_addsub_n(ws + n, p + n2, p, ws + n, n2);

  w += mpn_add_n (p + n22, p + n22, ws + n, n2);
  MPN_INCR_U (p + n22 + n2, 2 * n2 - (n22 + n2), w);

  /* Kara 3 */

  //mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);

  n23 = n2 >> 1;
  sign3 = 0;

  MPN_KARA_SUB(p + n, sign3, a + n2, n23);
  MPN_KARA_SUB(p + n + n23, sign3, b + n2, n23);

  mpn_mul_basecase (ws + n, p + n, n23, p + n + n23, n23);
  mpn_mul_basecase (p + n, a + n2, n23, b + n2, n23);
  mpn_mul_basecase (p + n + n2, a + n2 + n23, n23, b + n2 + n23, n23);

  if (sign3) w = mpn_addadd_n(ws + n, p + n + n2, p + n, ws + n, n2);
  else w = mpn_addsub_n(ws + n, p + n + n2, p + n, ws + n,
n2);

  w += mpn_add_n (p + n + n23, p + n + n23, ws + n, n2);
  MPN_INCR_U (p + n + n23 + n2, 2 * n2 - (n23 + n2), w);

  /* Interpolate. */

  if (sign) w = mpn_addadd_n(ws, p + n, p, ws, n);
  else w = mpn_addsub_n(ws, p + n, p, ws, n);

  w += mpn_add_n (p + n2, p + n2, ws, n);
  MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);
}

Bill.
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"mpir-devel" group.
To post to this group, send email to mpir-devel@googlegroups.com
To unsubscribe from this group, send email to 
mpir-devel+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/mpir-devel?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to