I had a play with the karatsuba function today. I figured that one might be able to cut out some overhead by having it complete the last two iterations of the recursion in one go. I wrote some code which works for the last two iterations when n/2, at that point, is divisible by 4. But after fiddling for quite some time to try and find a way to make the existing code faster with this as a special case, I have not seen any speedups. Anyhow, here is the code, in case someone else had some ideas. It obviously only works on systems with mpn_addadd_n and mpn_addsub_n.
#define MPN_KARA_SUB(pxxx, signxxx, axxx, n2xxx) \ do \ { \ i = n2xxx; \ do \ { \ --i; \ w0 = (axxx)[i]; \ w1 = (axxx)[n2xxx + i]; \ } while (w0 == w1 && i != 0); \ if (w0 < w1) \ { \ x = axxx + n2xxx; \ y = axxx; \ signxxx = ~signxxx; \ } else \ { \ x = axxx; \ y = axxx + n2xxx; \ } \ mpn_sub_n (pxxx, x, y, n2xxx); \ } while (0) void mpn_kara_mul_4n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws) { /* main vars */ mp_limb_t w, w0, w1; mp_size_t n2; mp_srcptr x, y; mp_size_t i; int sign, ci; /* kara 1 vars */ mp_size_t n21; int sign1; /* kara 2 vars */ mp_size_t n22; int sign2; /* kara 3 vars */ mp_size_t n23; int sign3; n2 = n >> 1; sign = 0; MPN_KARA_SUB(p, sign, a, n2); MPN_KARA_SUB(p + n2, sign, b, n2); /* Kara 1 */ //mpn_mul_basecase (ws, p, n2, p + n2, n2); n21 = n2 >> 1; sign1 = 0; MPN_KARA_SUB(ws, sign1, p, n21); MPN_KARA_SUB(ws + n21, sign1, p + n2, n21); mpn_mul_basecase (ws + n, ws, n21, ws + n21, n21); mpn_mul_basecase (ws, p, n21, p + n2, n21); mpn_mul_basecase (ws + n2, p + n21, n21, p + n2 + n21, n21); if (sign1) w = mpn_addadd_n(ws + n, ws + n2, ws, ws + n, n2); else w = mpn_addsub_n(ws + n, ws + n2, ws, ws + n, n2); w += mpn_add_n (ws + n21, ws + n21, ws + n, n2); MPN_INCR_U (ws + n21 + n2, 2 * n2 - (n21 + n2), w); /* Kara 2 */ //mpn_mul_basecase (p, a, n2, b, n2); n22 = n2 >> 1; sign2 = 0; MPN_KARA_SUB(p, sign2, a, n22); MPN_KARA_SUB(p + n22, sign2, b, n22); mpn_mul_basecase (ws + n, p, n22, p + n22, n22); mpn_mul_basecase (p, a, n22, b, n22); mpn_mul_basecase (p + n2, a + n22, n22, b + n22, n22); if (sign2) w = mpn_addadd_n(ws + n, p + n2, p, ws + n, n2); else w = mpn_addsub_n(ws + n, p + n2, p, ws + n, n2); w += mpn_add_n (p + n22, p + n22, ws + n, n2); MPN_INCR_U (p + n22 + n2, 2 * n2 - (n22 + n2), w); /* Kara 3 */ //mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2); n23 = n2 >> 1; sign3 = 0; MPN_KARA_SUB(p + n, sign3, a + n2, n23); MPN_KARA_SUB(p + n + n23, sign3, b + n2, n23); mpn_mul_basecase (ws + n, p + n, n23, p + n + n23, n23); mpn_mul_basecase (p + n, a + n2, n23, b + n2, n23); mpn_mul_basecase (p + n + n2, a + n2 + n23, n23, b + n2 + n23, n23); if (sign3) w = mpn_addadd_n(ws + n, p + n + n2, p + n, ws + n, n2); else w = mpn_addsub_n(ws + n, p + n + n2, p + n, ws + n, n2); w += mpn_add_n (p + n + n23, p + n + n23, ws + n, n2); MPN_INCR_U (p + n + n23 + n2, 2 * n2 - (n23 + n2), w); /* Interpolate. */ if (sign) w = mpn_addadd_n(ws, p + n, p, ws, n); else w = mpn_addsub_n(ws, p + n, p, ws, n); w += mpn_add_n (p + n2, p + n2, ws, n); MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w); } Bill. --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "mpir-devel" group. To post to this group, send email to mpir-devel@googlegroups.com To unsubscribe from this group, send email to mpir-devel+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/mpir-devel?hl=en -~----------~----~----~----~------~----~------~--~---