Niels Möller <ni...@lysator.liu.se> writes: > And now I found that speed's handling of toom22, toom32 and friends > always use fixed ratios, so I have to extend that to be able to get any > interesting benchmarks when varying the ratio.
Below patch seems to work. Most subtle part was to specify reasonable ratio limits for the various toom functions. Tested by running ./speed -c -s100-200 mpn_mul_basecase.-200 mpn_toom22_mul.-200 mpn_toom33_mul.-200 \ mpn_toom44_mul.-200 mpn_toom6h_mul.-200 mpn_toom8h_mul.-200 \ mpn_toom32_mul.-200 mpn_toom43_mul.-200 mpn_toom63_mul.-200 in a build with asserts enabled. I intend to push this later today. As a followup, it might be possible to delete the SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL and similar, instead setting s->size_ratio and using the plain measurement functions. Regards, /Niels diff -r 2d0b13e266c7 tune/common.c --- a/tune/common.c Wed Sep 27 08:55:14 2023 +0200 +++ b/tune/common.c Wed Sep 27 12:37:23 2023 +0200 @@ -1347,27 +1347,27 @@ double speed_mpn_toom22_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul); + SPEED_ROUTINE_MPN_TOOM22_MUL (mpn_toom22_mul); } double speed_mpn_toom33_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul); + SPEED_ROUTINE_MPN_TOOM33_MUL (mpn_toom33_mul); } double speed_mpn_toom44_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul); + SPEED_ROUTINE_MPN_TOOM44_MUL (mpn_toom44_mul); } double speed_mpn_toom6h_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul); + SPEED_ROUTINE_MPN_TOOM6H_MUL (mpn_toom6h_mul); } double speed_mpn_toom8h_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul); + SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul); } double diff -r 2d0b13e266c7 tune/speed.h --- a/tune/speed.h Wed Sep 27 08:55:14 2023 +0200 +++ b/tune/speed.h Wed Sep 27 12:37:23 2023 +0200 @@ -1474,6 +1474,47 @@ return t; \ } +#define SPEED_ROUTINE_MPN_MUL_TSPACE(function, itch, default_bn, valid) \ + { \ + mp_ptr wp, tspace; \ + mp_size_t an, bn, tn; \ + unsigned i; \ + double t; \ + TMP_DECL; \ + \ + an = s->size; \ + bn = s->size_ratio * s->size; \ + if (bn == 0) \ + { \ + bn = (s->r == 0 ? default_bn : s->r); \ + if (bn < 0) bn = -bn - an; \ + } \ + SPEED_RESTRICT_COND (bn >= 1); \ + SPEED_RESTRICT_COND (an >= bn); \ + SPEED_RESTRICT_COND (valid); \ + tn = itch(an, bn); \ + \ + TMP_MARK; \ + SPEED_TMP_ALLOC_LIMBS (wp, an + bn, s->align_wp); \ + SPEED_TMP_ALLOC_LIMBS (tspace, tn, s->align_wp2); \ + \ + speed_operand_src (s, s->xp, an); \ + speed_operand_src (s, s->yp, bn); \ + speed_operand_dst (s, wp, an + bn); \ + speed_operand_dst (s, tspace, tn); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + function(wp, s->xp, an, s->yp, bn, tspace); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + TMP_FREE; \ + return t; \ + } + #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \ { \ mp_ptr wp, tspace; \ @@ -1504,59 +1545,50 @@ return t; \ } -#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom22_mul_itch (s->size, s->size), \ - MPN_TOOM22_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom33_mul_itch (s->size, s->size), \ - MPN_TOOM33_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom44_mul_itch (s->size, s->size), \ - MPN_TOOM44_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom6h_mul_itch (s->size, s->size), \ - MPN_TOOM6H_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom8h_mul_itch (s->size, s->size), \ - MPN_TOOM8H_MUL_MINSIZE) +#define SPEED_ROUTINE_MPN_TOOM22_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom22_mul_itch, \ + an, 5*bn > 4*an) + +#define SPEED_ROUTINE_MPN_TOOM33_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom33_mul_itch, \ + an, bn > 2 * ((an+2) / 3)) + +#define SPEED_ROUTINE_MPN_TOOM44_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom44_mul_itch, \ + an, bn > 3*((an + 3) >> 2)) + +#define SPEED_ROUTINE_MPN_TOOM6H_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom6h_mul_itch, \ + an, bn >= 42 && ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17))) + +#define SPEED_ROUTINE_MPN_TOOM8H_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom8h_mul_itch, \ + an, (bn >= 86) && an*4 <= bn*11) #define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \ - mpn_toom32_mul_itch (s->size, 2*s->size/3), \ - MPN_TOOM32_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom32_mul_itch, \ + 2*an / 3, bn + 2 <= an && an + 6 <= 3*bn) #define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \ - mpn_toom42_mul_itch (s->size, s->size/2), \ - MPN_TOOM42_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom42_mul_itch, \ + an / 2, an >= 7 && bn >= 2 && an > 3*((bn+1)/2) && bn > ((an+3)/4)) #define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace), \ - mpn_toom43_mul_itch (s->size, s->size*3/4), \ - MPN_TOOM43_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom43_mul_itch, \ + an*3/4, an >= 7 && bn >= 5 && an > 3 * ((bn+2)/3) && bn > 2 * ((an+3)/4)) #define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \ - mpn_toom63_mul_itch (s->size, s->size/2), \ - MPN_TOOM63_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom63_mul_itch, \ + an/2, an >= 26 && bn >= 5 && an > 5*((bn+2)/3) && bn > 2*((an+5)/6)) #define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \ SPEED_ROUTINE_MPN_MUL_N_TSPACE \ > Regards, > /Niels -- Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel