On Wednesday, October 24, 2012 10:58:05 AM UTC+2, Jean-Pierre Flori wrote:
>
> [jp@jp-x220]% uname -a
> Linux jp-x220 3.5-trunk-amd64 #1 SMP Debian 3.5.5-1~experimental.1 x86_64 
> GNU/Linux
>
> [jp@jp-x220]% cat /proc/cpuinfo
> processor       : 0
> vendor_id       : GenuineIntel
> cpu family      : 6
> model           : 42
> model name      : Intel(R) Core(TM) i7-2620M CPU @ 2.70GHz
> ...
>
> [jp@jp-x220]% gcc -v 
> Using built-in specs.
> COLLECT_GCC=gcc
> COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.7/lto-wrapper
> Target: x86_64-linux-gnu
> Configured with: ../src/configure -v --with-pkgversion='Debian 4.7.2-4' 
> --with-bugurl=file:///usr/share/doc/gcc-4.7/README.Bugs 
> --enable-languages=c,c++,go,fortran,objc,obj-c++ --prefix=/usr 
> --program-suffix=-4.7 --enable-shared --enable-linker-build-id 
> --with-system-zlib --libexecdir=/usr/lib --without-included-gettext 
> --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.7 
> --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu 
> --enable-libstdcxx-debug --enable-libstdcxx-time=yes 
> --enable-gnu-unique-object --enable-plugin --enable-objc-gc 
> --with-arch-32=i586 --with-tune=generic --enable-checking=release 
> --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
> Thread model: posix
> gcc version 4.7.2 (Debian 4.7.2-4)
>
> [jp@jp-x220]% ./configure --prefix=$LOCAL --enable-gmpcompat --enable-cxx
>
> [jp@jp-x220]% ./config.guess
> sandybridge-unknown-linux-gnu
>
> [jp@jp-x220]% make tune
> ...
> Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h
> Using: CPU cycle counter, supplemented by microsecond getrusage()
> speed_precision 1000000, speed_unittime 1.25e-09 secs, CPU freq 800.00 MHz
> DEFAULT_MAX_SIZE 1000, fft_max_size 50000
>
> /* Generated by tuneup.c, 2012-10-24, gcc 4.7 */
>
> #define MUL_KARATSUBA_THRESHOLD          16
> #define MUL_TOOM3_THRESHOLD             105
> #define MUL_TOOM4_THRESHOLD             244
> #define MUL_TOOM8H_THRESHOLD            327
>
> #define SQR_BASECASE_THRESHOLD            0  /* always (native) */
> #define SQR_KARATSUBA_THRESHOLD          31
> #define SQR_TOOM3_THRESHOLD             101
> #define SQR_TOOM4_THRESHOLD             256
> #define SQR_TOOM8_THRESHOLD             333
>
> #define POWM_THRESHOLD                  138
>
> #define HGCD_THRESHOLD                   75
> #define GCD_DC_THRESHOLD               2797
> #define GCDEXT_DC_THRESHOLD            1788
> #define JACOBI_BASE_METHOD                1
>
> #define DIVREM_1_NORM_THRESHOLD       MP_SIZE_T_MAX  /* never */
> #define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* never */
> #define MOD_1_NORM_THRESHOLD              0  /* always */
> #define MOD_1_UNNORM_THRESHOLD            0  /* always */
> #define USE_PREINV_DIVREM_1               1  /* native */
> #define USE_PREINV_MOD_1                  1
> #define DIVEXACT_1_THRESHOLD              0  /* always */
> #define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
> #define MOD_1_1_THRESHOLD                 7
> #define MOD_1_2_THRESHOLD                 7
> #define MOD_1_3_THRESHOLD                23
> #define DIVREM_HENSEL_QR_1_THRESHOLD     29
> #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD      5
> #define DIVREM_EUCLID_HENSEL_THRESHOLD    146
>
> #define ROOTREM_THRESHOLD                 6
>
> #define GET_STR_DC_THRESHOLD             17
> #define GET_STR_PRECOMPUTE_THRESHOLD     23
> #define SET_STR_DC_THRESHOLD           6915
> #define SET_STR_PRECOMPUTE_THRESHOLD   7939
>
> #define MUL_FFT_FULL_THRESHOLD         3008
>
> #define SQR_FFT_FULL_THRESHOLD         3520
>
> #define MULLOW_BASECASE_THRESHOLD         7
> #define MULLOW_DC_THRESHOLD              30
> #define MULLOW_MUL_THRESHOLD           4525
>
> #define MULHIGH_BASECASE_THRESHOLD       10
> #define MULHIGH_DC_THRESHOLD             27
> #define MULHIGH_MUL_THRESHOLD          2966
>
> #define MULMOD_2EXPM1_THRESHOLD          20
>
> #define FAC_UI_THRESHOLD               1590
> #define DC_DIV_QR_THRESHOLD             100
> #define DC_DIVAPPR_Q_N_THRESHOLD         90
> #define INV_DIV_QR_THRESHOLD            465
> #define INV_DIVAPPR_Q_N_THRESHOLD        90
> #define DC_DIV_Q_THRESHOLD              136
> #define INV_DIV_Q_THRESHOLD            5581
> #define DC_DIVAPPR_Q_THRESHOLD          100
> #define INV_DIVAPPR_Q_THRESHOLD       12502
> #define DC_BDIV_QR_THRESHOLD            100
> #define DC_BDIV_Q_THRESHOLD              44
>
> /* fft_tuning -- autogenerated by tune-fft */
>
> #define FFT_TAB \
>    { { 4, 3 }, { 3, 2 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }
>
> #define MULMOD_TAB \
>    { 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 }
>
> #define FFT_N_NUM 19
>
> #define FFT_MULMOD_2EXPP1_CUTOFF 128
>
>
> /* Tuneup completed successfully, took 125 seconds */
>
>
> There might have been some problems with CPU throttling above (look at the 
800MHz in the make tune output.
Here is what I get when  setting the cpufreq governor to performance (i.e. 
2.7GHz).

[jp@jp-x220]% make tune
...
./tuneup
Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h
Using: CPU cycle counter, supplemented by microsecond getrusage()
speed_precision 1000000, speed_unittime 3.70e-10 secs, CPU freq 2701.00 MHz
DEFAULT_MAX_SIZE 1000, fft_max_size 50000

/* Generated by tuneup.c, 2012-10-24, gcc 4.7 */

#define MUL_KARATSUBA_THRESHOLD          16
#define MUL_TOOM3_THRESHOLD             105
#define MUL_TOOM4_THRESHOLD             246
#define MUL_TOOM8H_THRESHOLD            327

#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
#define SQR_KARATSUBA_THRESHOLD          31
#define SQR_TOOM3_THRESHOLD              61
#define SQR_TOOM4_THRESHOLD             178
#define SQR_TOOM8_THRESHOLD             240

#define POWM_THRESHOLD                  138

#define HGCD_THRESHOLD                   42
#define GCD_DC_THRESHOLD               2770
#define GCDEXT_DC_THRESHOLD            1788
#define JACOBI_BASE_METHOD                1

#define DIVREM_1_NORM_THRESHOLD       MP_SIZE_T_MAX  /* never */
#define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* never */
#define MOD_1_NORM_THRESHOLD              0  /* always */
#define MOD_1_UNNORM_THRESHOLD            0  /* always */
#define USE_PREINV_DIVREM_1               1  /* native */
#define USE_PREINV_MOD_1                  1
#define DIVEXACT_1_THRESHOLD              0  /* always */
#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
#define MOD_1_1_THRESHOLD                 7
#define MOD_1_2_THRESHOLD                 7
#define MOD_1_3_THRESHOLD                23
#define DIVREM_HENSEL_QR_1_THRESHOLD     31
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD      5
#define DIVREM_EUCLID_HENSEL_THRESHOLD     15

#define ROOTREM_THRESHOLD                 6

#define GET_STR_DC_THRESHOLD             16
#define GET_STR_PRECOMPUTE_THRESHOLD     23
#define SET_STR_DC_THRESHOLD           6915
#define SET_STR_PRECOMPUTE_THRESHOLD   6915

#define MUL_FFT_FULL_THRESHOLD         3008

#define SQR_FFT_FULL_THRESHOLD         3520

#define MULLOW_BASECASE_THRESHOLD         7
#define MULLOW_DC_THRESHOLD              30
#define MULLOW_MUL_THRESHOLD           4570

#define MULHIGH_BASECASE_THRESHOLD       10
#define MULHIGH_DC_THRESHOLD             27
#define MULHIGH_MUL_THRESHOLD          2966

#define MULMOD_2EXPM1_THRESHOLD          20

#define FAC_UI_THRESHOLD               1605
#define DC_DIV_QR_THRESHOLD             100
#define DC_DIVAPPR_Q_N_THRESHOLD         91
#define INV_DIV_QR_THRESHOLD            465
#define INV_DIVAPPR_Q_N_THRESHOLD        91
#define DC_DIV_Q_THRESHOLD              130
#define INV_DIV_Q_THRESHOLD            5581
#define DC_DIVAPPR_Q_THRESHOLD          102
#define INV_DIVAPPR_Q_THRESHOLD       12637
#define DC_BDIV_QR_THRESHOLD            100
#define DC_BDIV_Q_THRESHOLD              42

/* fft_tuning -- autogenerated by tune-fft */

#define FFT_TAB \
   { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }

#define MULMOD_TAB \
   { 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1 }

#define FFT_N_NUM 23

#define FFT_MULMOD_2EXPP1_CUTOFF 128


/* Tuneup completed successfully, took 137 seconds */

These are slightly different, but not more than if I rerun make tune once 
more with the CPU stuck at max frequency, see below:

[jp@jp-x220]% make tune
...
./tuneup
Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h
Using: CPU cycle counter, supplemented by microsecond getrusage()
speed_precision 1000000, speed_unittime 3.70e-10 secs, CPU freq 2701.00 MHz
DEFAULT_MAX_SIZE 1000, fft_max_size 50000

/* Generated by tuneup.c, 2012-10-24, gcc 4.7 */

#define MUL_KARATSUBA_THRESHOLD          16
#define MUL_TOOM3_THRESHOLD             105
#define MUL_TOOM4_THRESHOLD             244
#define MUL_TOOM8H_THRESHOLD            303

#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
#define SQR_KARATSUBA_THRESHOLD          31
#define SQR_TOOM3_THRESHOLD              95
#define SQR_TOOM4_THRESHOLD             250
#define SQR_TOOM8_THRESHOLD             351

#define POWM_THRESHOLD                  138

#define HGCD_THRESHOLD                   37
#define GCD_DC_THRESHOLD               2587
#define GCDEXT_DC_THRESHOLD            1788
#define JACOBI_BASE_METHOD                1

#define DIVREM_1_NORM_THRESHOLD       MP_SIZE_T_MAX  /* never */
#define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* never */
#define MOD_1_NORM_THRESHOLD              0  /* always */
#define MOD_1_UNNORM_THRESHOLD            0  /* always */
#define USE_PREINV_DIVREM_1               1  /* native */
#define USE_PREINV_MOD_1                  1
#define DIVEXACT_1_THRESHOLD              0  /* always */
#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
#define MOD_1_1_THRESHOLD                 7
#define MOD_1_2_THRESHOLD                 7
#define MOD_1_3_THRESHOLD                23
#define DIVREM_HENSEL_QR_1_THRESHOLD     31
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD      5
#define DIVREM_EUCLID_HENSEL_THRESHOLD    121

#define ROOTREM_THRESHOLD                 6

#define GET_STR_DC_THRESHOLD             17
#define GET_STR_PRECOMPUTE_THRESHOLD     23
#define SET_STR_DC_THRESHOLD           6915
#define SET_STR_PRECOMPUTE_THRESHOLD   8097

#define MUL_FFT_FULL_THRESHOLD         3008

#define SQR_FFT_FULL_THRESHOLD         3520

#define MULLOW_BASECASE_THRESHOLD         7
#define MULLOW_DC_THRESHOLD              30
#define MULLOW_MUL_THRESHOLD           4525

#define MULHIGH_BASECASE_THRESHOLD       10
#define MULHIGH_DC_THRESHOLD             30
#define MULHIGH_MUL_THRESHOLD          2966

#define MULMOD_2EXPM1_THRESHOLD          20

#define FAC_UI_THRESHOLD               1590
#define DC_DIV_QR_THRESHOLD             100
#define DC_DIVAPPR_Q_N_THRESHOLD         90
#define INV_DIV_QR_THRESHOLD            465
#define INV_DIVAPPR_Q_N_THRESHOLD        90
#define DC_DIV_Q_THRESHOLD               39
#define INV_DIV_Q_THRESHOLD            5581
#define DC_DIVAPPR_Q_THRESHOLD          104
#define INV_DIVAPPR_Q_THRESHOLD       14091
#define DC_BDIV_QR_THRESHOLD            100
#define DC_BDIV_Q_THRESHOLD              44

/* fft_tuning -- autogenerated by tune-fft */

#define FFT_TAB \
   { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }

#define MULMOD_TAB \
   { 4, 3, 3, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 }

#define FFT_N_NUM 19

#define FFT_MULMOD_2EXPP1_CUTOFF 128


/* Tuneup completed successfully, took 124 seconds */

-- 
You received this message because you are subscribed to the Google Groups 
"mpir-devel" group.
To view this discussion on the web visit 
https://groups.google.com/d/msg/mpir-devel/-/jDgob3Czm8UJ.
To post to this group, send email to mpir-devel@googlegroups.com.
To unsubscribe from this group, send email to 
mpir-devel+unsubscr...@googlegroups.com.
For more options, visit this group at 
http://groups.google.com/group/mpir-devel?hl=en.

Reply via email to