Paul Zimmermann <paul.zimmerm...@inria.fr> writes: > Dear Niels, > > ./speed -p 1000000 -c -s 10-200 -f1.1 mpn_mul.0.6 would be more readable, > although the change in speed.h would be larger.
See below patch, to support mpn_mul/0.6. Example output: $ ./speed -c -r -s 10-50 -t 5 -p 1000000 mpn_mul mpn_mul/0.6 overhead 5.00 cycles, precision 1000000 units of 1.25e-09 secs, CPU freq 798.28 MHz mpn_mul mpn_mul/0.6 10 215.48 #0.6295 15 443.16 #0.6066 20 784.50 #0.6138 25 1209.57 #0.5957 30 1490.59 #0.6934 35 1986.51 #0.6918 40 2547.99 #0.6981 45 3189.27 #0.6633 50 3827.87 #0.6734 What do you think? (Also deleted FLAG_RSIZE, which appeared unused). (One potential bug is missing initialization of the new .size_ratio in struct choice_t to 0.0, but I don't see initialization of .r either, I'm probably missing something). > Or maybe ./speed -p 1000000 -c -s 10-200 -f1.1 -r 0.6 mpn_mul ? I think it's nicer to be able to specify it separately for each function under test. Regards, /Niels diff -r 510152c4ca97 tune/speed.c --- a/tune/speed.c Tue Aug 22 10:20:40 2023 +0200 +++ b/tune/speed.c Mon Sep 18 18:10:04 2023 +0200 @@ -130,7 +130,7 @@ #define FLAG_R (1<<0) /* require ".r" */ #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ -#define FLAG_RSIZE (1<<2) +#define FLAG_SR_OPTIONAL (1<<2) /* optional ".r" or "/r" */ #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ const struct routine_t { @@ -328,8 +328,8 @@ { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, - { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, - { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, + { "mpn_mul", speed_mpn_mul, FLAG_SR_OPTIONAL }, + { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_SR_OPTIONAL }, { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, #if HAVE_NATIVE_mpn_sqr_diagonal { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, @@ -346,22 +346,22 @@ { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, - { "mpn_toom22_mul", speed_mpn_toom22_mul }, - { "mpn_toom33_mul", speed_mpn_toom33_mul }, - { "mpn_toom44_mul", speed_mpn_toom44_mul }, - { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, - { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, - { "mpn_toom32_mul", speed_mpn_toom32_mul }, - { "mpn_toom42_mul", speed_mpn_toom42_mul }, - { "mpn_toom43_mul", speed_mpn_toom43_mul }, - { "mpn_toom63_mul", speed_mpn_toom63_mul }, - { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, + { "mpn_toom22_mul", speed_mpn_toom22_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom33_mul", speed_mpn_toom33_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom44_mul", speed_mpn_toom44_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom6h_mul", speed_mpn_toom6h_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom8h_mul", speed_mpn_toom8h_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom32_mul", speed_mpn_toom32_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom42_mul", speed_mpn_toom42_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom43_mul", speed_mpn_toom43_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom63_mul", speed_mpn_toom63_mul, FLAG_SR_OPTIONAL }, + { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul, FLAG_SR_OPTIONAL}, { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, #if WANT_OLD_FFT_FULL - { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, + { "mpn_mul_fft_full", speed_mpn_mul_fft_full, FLAG_SR_OPTIONAL}, { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, #endif - { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, + { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_SR_OPTIONAL }, { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, { "mpn_sqrlo", speed_mpn_sqrlo }, @@ -576,6 +576,7 @@ struct choice_t { const struct routine_t *p; mp_limb_t r; + double size_ratio; double scale; double time; int no_time; @@ -670,6 +671,7 @@ for (i = 0; i < num_choices; i++) { s->r = choice[i].r; + s->size_ratio = choice[i].size_ratio; choice[i].time = speed_measure (choice[i].p->fun, s); choice[i].no_time = (choice[i].time == -1.0); if (! choice[i].no_time) @@ -1011,6 +1013,17 @@ return n; } +double slash_r_string (const char *s) +{ + char *end; + double r = strtod(s, &end); + if (s[0] == '\0' || end[0] != '\0' || r > 1.0 || r < 0.0) + { + fprintf (stderr, "invalid /r parameter: %s\n", s); + exit (1); + } + return r; +} void routine_find (struct choice_t *c, const char *s_orig) @@ -1054,6 +1067,22 @@ c->r = r_string (s + nlen + 1); return; } + if (s[nlen] == '/') + { + /* match, with a /r parameter */ + + if (! (routine[i].flag & (FLAG_SR_OPTIONAL))) + { + fprintf (stderr, + "Choice %s bad: doesn't take a \"/<r>\" parameter\n", + s_orig); + exit (1); + } + + c->p = &routine[i]; + c->size_ratio = slash_r_string (s + nlen + 1); + return; + } if (s[nlen] == '\0') { @@ -1125,6 +1154,8 @@ printf ("\t%s.r\n", routine[i].name); else if (routine[i].flag & FLAG_R_OPTIONAL) printf ("\t%s (optional .r)\n", routine[i].name); + else if (routine[i].flag & FLAG_SR_OPTIONAL) + printf ("\t%s (optional .r or /r)\n", routine[i].name); else printf ("\t%s\n", routine[i].name); } diff -r 510152c4ca97 tune/speed.h --- a/tune/speed.h Tue Aug 22 10:20:40 2023 +0200 +++ b/tune/speed.h Mon Sep 18 18:10:04 2023 +0200 @@ -113,6 +113,7 @@ mp_ptr yp; /* second argument */ mp_size_t size; /* size of both arguments */ mp_limb_t r; /* user supplied parameter */ + double size_ratio; /* ratio for smaller to larger size, e.g., for mpn_mul */ mp_size_t align_xp; /* alignment of xp */ mp_size_t align_yp; /* alignment of yp */ mp_size_t align_wp; /* intended alignment of wp */ @@ -1122,9 +1123,13 @@ double t; \ TMP_DECL; \ \ - size1 = (s->r == 0 ? s->size : s->r); \ - if (size1 < 0) size1 = -size1 - s->size; \ - \ + if (s->size_ratio > 0.0) \ + size1 = s->size_ratio * s->size; \ + else \ + { \ + size1 = (s->r == 0 ? s->size : s->r); \ + if (size1 < 0) size1 = -size1 - s->size; \ + } \ SPEED_RESTRICT_COND (size1 >= 1); \ SPEED_RESTRICT_COND (s->size >= size1); \ \ -- Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel