Here are result for a bulldozer cpu (combinat on Sage cluster). I removed x86_64/sqr_basecase.asm which was the assembly file used. Note that without it, the speed exe segfault...
################################################################################ # With assembly file ################################################################################ jpflori@combinat:~/build/mpir/tune$ ./speed -s 1-100 -t 1 mpn_sqr_basecase overhead 0.000000002 secs, precision 1000000 units of 4.35e-10 secs, CPU freq 2300.19 MHz mpn_sqr_basecase 1 0.000000005 2 0.000000009 3 0.000000014 4 0.000000028 5 0.000000038 6 0.000000052 7 0.000000065 8 0.000000082 9 0.000000098 10 0.000000118 11 0.000000140 12 0.000000160 13 0.000000185 14 0.000000216 15 0.000000233 16 0.000000285 17 0.000000311 18 0.000000339 19 0.000000376 20 0.000000427 21 0.000000457 22 0.000000507 23 0.000000550 24 0.000000594 25 0.000000620 26 0.000000704 27 0.000000764 28 0.000000816 29 0.000000890 30 0.000000962 31 0.000000993 32 0.000001096 33 0.000001119 34 0.000001199 35 0.000001262 36 0.000001330 37 0.000001413 38 0.000001521 39 0.000001572 40 0.000001620 41 0.000001731 42 0.000001827 43 0.000001891 44 0.000001972 45 0.000002099 46 0.000002192 47 0.000002264 48 0.000002393 49 0.000002494 50 0.000002572 51 0.000002682 52 0.000002784 53 0.000002873 54 0.000002950 55 0.000003086 56 0.000003208 57 0.000003314 58 0.000003470 59 0.000003597 60 0.000003734 61 0.000003824 62 0.000003925 63 0.000004042 64 0.000004244 65 0.000004308 66 0.000004404 67 0.000004577 68 0.000004737 69 0.000004907 70 0.000004991 71 0.000005139 72 0.000005274 73 0.000005442 74 0.000005549 75 0.000005704 76 0.000005921 77 0.000006020 78 0.000006226 79 0.000006342 80 0.000006742 81 0.000006855 82 0.000007019 83 0.000007181 84 0.000007380 85 0.000007529 86 0.000007674 87 0.000007733 88 0.000008123 89 0.000008130 90 0.000008421 91 0.000008468 92 0.000008748 93 0.000008864 94 0.000009011 95 0.000009163 96 0.000009462 97 0.000009550 98 0.000009791 99 0.000009950 100 0.000010140 ./tuneup Parameters for ./mpn/x86_64/k8/k10/k102/gmp-mparam.h Using: CPU cycle counter, supplemented by microsecond getrusage() speed_precision 1000000, speed_unittime 4.35e-10 secs, CPU freq 2300.19 MHz DEFAULT_MAX_SIZE 1000, fft_max_size 50000 /* Generated by tuneup.c, 2014-02-17, gcc 4.6 */ #define MUL_KARATSUBA_THRESHOLD 14 #define MUL_TOOM3_THRESHOLD 89 #define MUL_TOOM4_THRESHOLD 238 #define MUL_TOOM8H_THRESHOLD 351 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 20 #define SQR_TOOM3_THRESHOLD 122 #define SQR_TOOM4_THRESHOLD 680 #define SQR_TOOM8_THRESHOLD 680 #define POWM_THRESHOLD 128 #define HGCD_THRESHOLD 91 #define GCD_DC_THRESHOLD 418 #define GCDEXT_DC_THRESHOLD 351 #define JACOBI_BASE_METHOD 1 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define USE_PREINV_DIVREM_1 1 /* native */ #define USE_PREINV_MOD_1 1 #define DIVEXACT_1_THRESHOLD 0 /* always */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 7 #define MOD_1_2_THRESHOLD 7 #define MOD_1_3_THRESHOLD 20 #define DIVREM_HENSEL_QR_1_THRESHOLD 13 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 13 #define DIVREM_EUCLID_HENSEL_THRESHOLD 24 #define ROOTREM_THRESHOLD 6 #define GET_STR_DC_THRESHOLD 12 #define GET_STR_PRECOMPUTE_THRESHOLD 19 #define SET_STR_DC_THRESHOLD 454 #define SET_STR_PRECOMPUTE_THRESHOLD 538 #define MUL_FFT_FULL_THRESHOLD 3904 #define SQR_FFT_FULL_THRESHOLD 3392 #define MULLOW_BASECASE_THRESHOLD 0 /* always */ #define MULLOW_DC_THRESHOLD 33 #define MULLOW_MUL_THRESHOLD 5565 #define MULHIGH_BASECASE_THRESHOLD 7 #define MULHIGH_DC_THRESHOLD 29 #define MULHIGH_MUL_THRESHOLD 3336 #define MULMOD_2EXPM1_THRESHOLD 14 #define FAC_UI_THRESHOLD 8671 #define DC_DIV_QR_THRESHOLD 52 #define DC_DIVAPPR_Q_N_THRESHOLD 12 #define INV_DIV_QR_THRESHOLD 2130 #define INV_DIVAPPR_Q_N_THRESHOLD 12 #define DC_DIV_Q_THRESHOLD 122 #define INV_DIV_Q_THRESHOLD 6039 #define DC_DIVAPPR_Q_THRESHOLD 41 #define INV_DIVAPPR_Q_THRESHOLD 16039 #define DC_BDIV_QR_THRESHOLD 54 #define DC_BDIV_Q_THRESHOLD 35 /* fft_tuning -- autogenerated by tune-fft */ #define FFT_TAB \ { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } #define MULMOD_TAB \ { 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 } #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 256 /* Tuneup completed successfully, took 166 seconds */ ################################################################################ # Without assembly file ################################################################################ jpflori@combinat:~/build/mpir/tune$ ./speed -s 1-100 -t 1 mpn_sqr_basecase overhead 0.000000002 secs, precision 1000000 units of 4.35e-10 secs, CPU freq 2300.19 MHz mpn_sqr_basecase 1 0.000000008 2 0.000000016 3 0.000000046 4 0.000000061 5 0.000000079 6 0.000000097 7 0.000000125 8 0.000000144 9 0.000000171 10 0.000000189 11 0.000000237 12 0.000000262 13 0.000000293 14 0.000000335 15 0.000000368 16 0.000000395 17 0.000000468 18 0.000000487 19 0.000000545 20 0.000000580 21 0.000000663 22 0.000000684 23 0.000000741 24 0.000000805 25 0.000000893 26 0.000000888 27 0.000001022 28 0.000001021 29 0.000001138 30 0.000001152 31 0.000001260 Segmentation fault (core dumped) -- You received this message because you are subscribed to the Google Groups "mpir-devel" group. To unsubscribe from this group and stop receiving emails from it, send an email to mpir-devel+unsubscr...@googlegroups.com. To post to this group, send email to mpir-devel@googlegroups.com. Visit this group at http://groups.google.com/group/mpir-devel. For more options, visit https://groups.google.com/groups/opt_out.