Torbjörn Granlund <t...@gmplib.org> writes: > I can confirm that the tuneup program now works. (It took a few days > extra as a FreeBSD upgrade had broken the autotool.)
I'd like to get the changes back in, piece by piece... The below patch is the change to add /r syntax, and enable it only for mpn_mul and mpn_mul_basecase. Seems to work for me, tested by running ./tuneup and ./speed -r -s 10-500 -f 1.2 -C mpn_mul mpn_mul/0.7 mpn_mul/0.4 Does that look reasonable? Regards, /Niels diff -r c678b487c101 tune/speed.c --- a/tune/speed.c Sun Oct 22 17:04:29 2023 +0200 +++ b/tune/speed.c Wed Nov 15 15:01:08 2023 +0100 @@ -130,7 +130,7 @@ #define FLAG_R (1<<0) /* require ".r" */ #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ -#define FLAG_RSIZE (1<<2) +#define FLAG_SR_OPTIONAL (1<<2) /* optional ".r" or "/r" */ #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ const struct routine_t { @@ -328,8 +328,8 @@ { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, - { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, - { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, + { "mpn_mul", speed_mpn_mul, FLAG_SR_OPTIONAL }, + { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_SR_OPTIONAL }, { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, #if HAVE_NATIVE_mpn_sqr_diagonal { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, @@ -576,6 +576,7 @@ struct choice_t { const struct routine_t *p; mp_limb_t r; + double size_ratio; double scale; double time; int no_time; @@ -670,6 +671,7 @@ for (i = 0; i < num_choices; i++) { s->r = choice[i].r; + s->size_ratio = choice[i].size_ratio; choice[i].time = speed_measure (choice[i].p->fun, s); choice[i].no_time = (choice[i].time == -1.0); if (! choice[i].no_time) @@ -1011,6 +1013,17 @@ return n; } +double slash_r_string (const char *s) +{ + char *end; + double r = strtod(s, &end); + if (s[0] == '\0' || end[0] != '\0' || r > 1.0 || r < 0.0) + { + fprintf (stderr, "invalid /r parameter: %s\n", s); + exit (1); + } + return r; +} void routine_find (struct choice_t *c, const char *s_orig) @@ -1042,7 +1055,7 @@ { /* match, with a .r parameter */ - if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) + if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL|FLAG_SR_OPTIONAL))) { fprintf (stderr, "Choice %s bad: doesn't take a \".<r>\" parameter\n", @@ -1054,6 +1067,22 @@ c->r = r_string (s + nlen + 1); return; } + if (s[nlen] == '/') + { + /* match, with a /r parameter */ + + if (! (routine[i].flag & (FLAG_SR_OPTIONAL))) + { + fprintf (stderr, + "Choice %s bad: doesn't take a \"/<r>\" parameter\n", + s_orig); + exit (1); + } + + c->p = &routine[i]; + c->size_ratio = slash_r_string (s + nlen + 1); + return; + } if (s[nlen] == '\0') { @@ -1125,6 +1154,8 @@ printf ("\t%s.r\n", routine[i].name); else if (routine[i].flag & FLAG_R_OPTIONAL) printf ("\t%s (optional .r)\n", routine[i].name); + else if (routine[i].flag & FLAG_SR_OPTIONAL) + printf ("\t%s (optional .r or /r)\n", routine[i].name); else printf ("\t%s\n", routine[i].name); } @@ -1135,6 +1166,8 @@ printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); printf ("\n"); + printf ("Routines with an optional \"/r\" take a decimal ratio, for example mpn_mul/0.7.\n"); + printf ("\n"); printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); printf ("The fastest routine at each size is marked with a # (free form output only).\n"); printf ("\n"); diff -r c678b487c101 tune/speed.h --- a/tune/speed.h Sun Oct 22 17:04:29 2023 +0200 +++ b/tune/speed.h Wed Nov 15 15:01:08 2023 +0100 @@ -113,6 +113,7 @@ mp_ptr yp; /* second argument */ mp_size_t size; /* size of both arguments */ mp_limb_t r; /* user supplied parameter */ + double size_ratio; /* ratio for smaller to larger size, e.g., for mpn_mul */ mp_size_t align_xp; /* alignment of xp */ mp_size_t align_yp; /* alignment of yp */ mp_size_t align_wp; /* intended alignment of wp */ @@ -1122,9 +1123,12 @@ double t; \ TMP_DECL; \ \ - size1 = (s->r == 0 ? s->size : s->r); \ - if (size1 < 0) size1 = -size1 - s->size; \ - \ + size1 = s->size_ratio * s->size; \ + if (size1 == 0) \ + { \ + size1 = (s->r == 0 ? s->size : s->r); \ + if (size1 < 0) size1 = -size1 - s->size; \ + } \ SPEED_RESTRICT_COND (size1 >= 1); \ SPEED_RESTRICT_COND (s->size >= size1); \ \ diff -r c678b487c101 tune/tuneup.c --- a/tune/tuneup.c Sun Oct 22 17:04:29 2023 +0200 +++ b/tune/tuneup.c Wed Nov 15 15:01:08 2023 +0100 @@ -2948,6 +2948,7 @@ #endif } printf ("\n"); + s.size_ratio = 0.0; tune_divrem_1 (); tune_mod_1 (); -- Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel