On Wednesday, October 24, 2012 10:58:05 AM UTC+2, Jean-Pierre Flori wrote: > > [jp@jp-x220]% uname -a > Linux jp-x220 3.5-trunk-amd64 #1 SMP Debian 3.5.5-1~experimental.1 x86_64 > GNU/Linux > > [jp@jp-x220]% cat /proc/cpuinfo > processor : 0 > vendor_id : GenuineIntel > cpu family : 6 > model : 42 > model name : Intel(R) Core(TM) i7-2620M CPU @ 2.70GHz > ... > > [jp@jp-x220]% gcc -v > Using built-in specs. > COLLECT_GCC=gcc > COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.7/lto-wrapper > Target: x86_64-linux-gnu > Configured with: ../src/configure -v --with-pkgversion='Debian 4.7.2-4' > --with-bugurl=file:///usr/share/doc/gcc-4.7/README.Bugs > --enable-languages=c,c++,go,fortran,objc,obj-c++ --prefix=/usr > --program-suffix=-4.7 --enable-shared --enable-linker-build-id > --with-system-zlib --libexecdir=/usr/lib --without-included-gettext > --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.7 > --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu > --enable-libstdcxx-debug --enable-libstdcxx-time=yes > --enable-gnu-unique-object --enable-plugin --enable-objc-gc > --with-arch-32=i586 --with-tune=generic --enable-checking=release > --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu > Thread model: posix > gcc version 4.7.2 (Debian 4.7.2-4) > > [jp@jp-x220]% ./configure --prefix=$LOCAL --enable-gmpcompat --enable-cxx > > [jp@jp-x220]% ./config.guess > sandybridge-unknown-linux-gnu > > [jp@jp-x220]% make tune > ... > Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h > Using: CPU cycle counter, supplemented by microsecond getrusage() > speed_precision 1000000, speed_unittime 1.25e-09 secs, CPU freq 800.00 MHz > DEFAULT_MAX_SIZE 1000, fft_max_size 50000 > > /* Generated by tuneup.c, 2012-10-24, gcc 4.7 */ > > #define MUL_KARATSUBA_THRESHOLD 16 > #define MUL_TOOM3_THRESHOLD 105 > #define MUL_TOOM4_THRESHOLD 244 > #define MUL_TOOM8H_THRESHOLD 327 > > #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ > #define SQR_KARATSUBA_THRESHOLD 31 > #define SQR_TOOM3_THRESHOLD 101 > #define SQR_TOOM4_THRESHOLD 256 > #define SQR_TOOM8_THRESHOLD 333 > > #define POWM_THRESHOLD 138 > > #define HGCD_THRESHOLD 75 > #define GCD_DC_THRESHOLD 2797 > #define GCDEXT_DC_THRESHOLD 1788 > #define JACOBI_BASE_METHOD 1 > > #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ > #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ > #define MOD_1_NORM_THRESHOLD 0 /* always */ > #define MOD_1_UNNORM_THRESHOLD 0 /* always */ > #define USE_PREINV_DIVREM_1 1 /* native */ > #define USE_PREINV_MOD_1 1 > #define DIVEXACT_1_THRESHOLD 0 /* always */ > #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ > #define MOD_1_1_THRESHOLD 7 > #define MOD_1_2_THRESHOLD 7 > #define MOD_1_3_THRESHOLD 23 > #define DIVREM_HENSEL_QR_1_THRESHOLD 29 > #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 5 > #define DIVREM_EUCLID_HENSEL_THRESHOLD 146 > > #define ROOTREM_THRESHOLD 6 > > #define GET_STR_DC_THRESHOLD 17 > #define GET_STR_PRECOMPUTE_THRESHOLD 23 > #define SET_STR_DC_THRESHOLD 6915 > #define SET_STR_PRECOMPUTE_THRESHOLD 7939 > > #define MUL_FFT_FULL_THRESHOLD 3008 > > #define SQR_FFT_FULL_THRESHOLD 3520 > > #define MULLOW_BASECASE_THRESHOLD 7 > #define MULLOW_DC_THRESHOLD 30 > #define MULLOW_MUL_THRESHOLD 4525 > > #define MULHIGH_BASECASE_THRESHOLD 10 > #define MULHIGH_DC_THRESHOLD 27 > #define MULHIGH_MUL_THRESHOLD 2966 > > #define MULMOD_2EXPM1_THRESHOLD 20 > > #define FAC_UI_THRESHOLD 1590 > #define DC_DIV_QR_THRESHOLD 100 > #define DC_DIVAPPR_Q_N_THRESHOLD 90 > #define INV_DIV_QR_THRESHOLD 465 > #define INV_DIVAPPR_Q_N_THRESHOLD 90 > #define DC_DIV_Q_THRESHOLD 136 > #define INV_DIV_Q_THRESHOLD 5581 > #define DC_DIVAPPR_Q_THRESHOLD 100 > #define INV_DIVAPPR_Q_THRESHOLD 12502 > #define DC_BDIV_QR_THRESHOLD 100 > #define DC_BDIV_Q_THRESHOLD 44 > > /* fft_tuning -- autogenerated by tune-fft */ > > #define FFT_TAB \ > { { 4, 3 }, { 3, 2 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } > > #define MULMOD_TAB \ > { 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 } > > #define FFT_N_NUM 19 > > #define FFT_MULMOD_2EXPP1_CUTOFF 128 > > > /* Tuneup completed successfully, took 125 seconds */ > > > There might have been some problems with CPU throttling above (look at the 800MHz in the make tune output. Here is what I get when setting the cpufreq governor to performance (i.e. 2.7GHz).
[jp@jp-x220]% make tune ... ./tuneup Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h Using: CPU cycle counter, supplemented by microsecond getrusage() speed_precision 1000000, speed_unittime 3.70e-10 secs, CPU freq 2701.00 MHz DEFAULT_MAX_SIZE 1000, fft_max_size 50000 /* Generated by tuneup.c, 2012-10-24, gcc 4.7 */ #define MUL_KARATSUBA_THRESHOLD 16 #define MUL_TOOM3_THRESHOLD 105 #define MUL_TOOM4_THRESHOLD 246 #define MUL_TOOM8H_THRESHOLD 327 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 31 #define SQR_TOOM3_THRESHOLD 61 #define SQR_TOOM4_THRESHOLD 178 #define SQR_TOOM8_THRESHOLD 240 #define POWM_THRESHOLD 138 #define HGCD_THRESHOLD 42 #define GCD_DC_THRESHOLD 2770 #define GCDEXT_DC_THRESHOLD 1788 #define JACOBI_BASE_METHOD 1 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define USE_PREINV_DIVREM_1 1 /* native */ #define USE_PREINV_MOD_1 1 #define DIVEXACT_1_THRESHOLD 0 /* always */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 7 #define MOD_1_2_THRESHOLD 7 #define MOD_1_3_THRESHOLD 23 #define DIVREM_HENSEL_QR_1_THRESHOLD 31 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 5 #define DIVREM_EUCLID_HENSEL_THRESHOLD 15 #define ROOTREM_THRESHOLD 6 #define GET_STR_DC_THRESHOLD 16 #define GET_STR_PRECOMPUTE_THRESHOLD 23 #define SET_STR_DC_THRESHOLD 6915 #define SET_STR_PRECOMPUTE_THRESHOLD 6915 #define MUL_FFT_FULL_THRESHOLD 3008 #define SQR_FFT_FULL_THRESHOLD 3520 #define MULLOW_BASECASE_THRESHOLD 7 #define MULLOW_DC_THRESHOLD 30 #define MULLOW_MUL_THRESHOLD 4570 #define MULHIGH_BASECASE_THRESHOLD 10 #define MULHIGH_DC_THRESHOLD 27 #define MULHIGH_MUL_THRESHOLD 2966 #define MULMOD_2EXPM1_THRESHOLD 20 #define FAC_UI_THRESHOLD 1605 #define DC_DIV_QR_THRESHOLD 100 #define DC_DIVAPPR_Q_N_THRESHOLD 91 #define INV_DIV_QR_THRESHOLD 465 #define INV_DIVAPPR_Q_N_THRESHOLD 91 #define DC_DIV_Q_THRESHOLD 130 #define INV_DIV_Q_THRESHOLD 5581 #define DC_DIVAPPR_Q_THRESHOLD 102 #define INV_DIVAPPR_Q_THRESHOLD 12637 #define DC_BDIV_QR_THRESHOLD 100 #define DC_BDIV_Q_THRESHOLD 42 /* fft_tuning -- autogenerated by tune-fft */ #define FFT_TAB \ { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } #define MULMOD_TAB \ { 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1 } #define FFT_N_NUM 23 #define FFT_MULMOD_2EXPP1_CUTOFF 128 /* Tuneup completed successfully, took 137 seconds */ These are slightly different, but not more than if I rerun make tune once more with the CPU stuck at max frequency, see below: [jp@jp-x220]% make tune ... ./tuneup Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h Using: CPU cycle counter, supplemented by microsecond getrusage() speed_precision 1000000, speed_unittime 3.70e-10 secs, CPU freq 2701.00 MHz DEFAULT_MAX_SIZE 1000, fft_max_size 50000 /* Generated by tuneup.c, 2012-10-24, gcc 4.7 */ #define MUL_KARATSUBA_THRESHOLD 16 #define MUL_TOOM3_THRESHOLD 105 #define MUL_TOOM4_THRESHOLD 244 #define MUL_TOOM8H_THRESHOLD 303 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 31 #define SQR_TOOM3_THRESHOLD 95 #define SQR_TOOM4_THRESHOLD 250 #define SQR_TOOM8_THRESHOLD 351 #define POWM_THRESHOLD 138 #define HGCD_THRESHOLD 37 #define GCD_DC_THRESHOLD 2587 #define GCDEXT_DC_THRESHOLD 1788 #define JACOBI_BASE_METHOD 1 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define MOD_1_NORM_THRESHOLD 0 /* always */ #define MOD_1_UNNORM_THRESHOLD 0 /* always */ #define USE_PREINV_DIVREM_1 1 /* native */ #define USE_PREINV_MOD_1 1 #define DIVEXACT_1_THRESHOLD 0 /* always */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 7 #define MOD_1_2_THRESHOLD 7 #define MOD_1_3_THRESHOLD 23 #define DIVREM_HENSEL_QR_1_THRESHOLD 31 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 5 #define DIVREM_EUCLID_HENSEL_THRESHOLD 121 #define ROOTREM_THRESHOLD 6 #define GET_STR_DC_THRESHOLD 17 #define GET_STR_PRECOMPUTE_THRESHOLD 23 #define SET_STR_DC_THRESHOLD 6915 #define SET_STR_PRECOMPUTE_THRESHOLD 8097 #define MUL_FFT_FULL_THRESHOLD 3008 #define SQR_FFT_FULL_THRESHOLD 3520 #define MULLOW_BASECASE_THRESHOLD 7 #define MULLOW_DC_THRESHOLD 30 #define MULLOW_MUL_THRESHOLD 4525 #define MULHIGH_BASECASE_THRESHOLD 10 #define MULHIGH_DC_THRESHOLD 30 #define MULHIGH_MUL_THRESHOLD 2966 #define MULMOD_2EXPM1_THRESHOLD 20 #define FAC_UI_THRESHOLD 1590 #define DC_DIV_QR_THRESHOLD 100 #define DC_DIVAPPR_Q_N_THRESHOLD 90 #define INV_DIV_QR_THRESHOLD 465 #define INV_DIVAPPR_Q_N_THRESHOLD 90 #define DC_DIV_Q_THRESHOLD 39 #define INV_DIV_Q_THRESHOLD 5581 #define DC_DIVAPPR_Q_THRESHOLD 104 #define INV_DIVAPPR_Q_THRESHOLD 14091 #define DC_BDIV_QR_THRESHOLD 100 #define DC_BDIV_Q_THRESHOLD 44 /* fft_tuning -- autogenerated by tune-fft */ #define FFT_TAB \ { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } #define MULMOD_TAB \ { 4, 3, 3, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 } #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 /* Tuneup completed successfully, took 124 seconds */ -- You received this message because you are subscribed to the Google Groups "mpir-devel" group. To view this discussion on the web visit https://groups.google.com/d/msg/mpir-devel/-/jDgob3Czm8UJ. To post to this group, send email to mpir-devel@googlegroups.com. To unsubscribe from this group, send email to mpir-devel+unsubscr...@googlegroups.com. For more options, visit this group at http://groups.google.com/group/mpir-devel?hl=en.