Here are result for a bulldozer cpu (combinat on Sage cluster).
I removed x86_64/sqr_basecase.asm which was the assembly file used.
Note that without it, the speed exe segfault...

################################################################################
# With assembly file
################################################################################
jpflori@combinat:~/build/mpir/tune$ ./speed -s 1-100 -t 1 mpn_sqr_basecase
overhead 0.000000002 secs, precision 1000000 units of 4.35e-10 secs, CPU 
freq 2300.19 MHz
        mpn_sqr_basecase
1         0.000000005
2         0.000000009
3         0.000000014
4         0.000000028
5         0.000000038
6         0.000000052
7         0.000000065
8         0.000000082
9         0.000000098
10        0.000000118
11        0.000000140
12        0.000000160
13        0.000000185
14        0.000000216
15        0.000000233
16        0.000000285
17        0.000000311
18        0.000000339
19        0.000000376
20        0.000000427
21        0.000000457
22        0.000000507
23        0.000000550
24        0.000000594
25        0.000000620
26        0.000000704
27        0.000000764
28        0.000000816
29        0.000000890
30        0.000000962
31        0.000000993
32        0.000001096
33        0.000001119
34        0.000001199
35        0.000001262
36        0.000001330
37        0.000001413
38        0.000001521
39        0.000001572
40        0.000001620
41        0.000001731
42        0.000001827
43        0.000001891
44        0.000001972
45        0.000002099
46        0.000002192
47        0.000002264
48        0.000002393
49        0.000002494
50        0.000002572
51        0.000002682
52        0.000002784
53        0.000002873
54        0.000002950
55        0.000003086
56        0.000003208
57        0.000003314
58        0.000003470
59        0.000003597
60        0.000003734
61        0.000003824
62        0.000003925
63        0.000004042
64        0.000004244
65        0.000004308
66        0.000004404
67        0.000004577
68        0.000004737
69        0.000004907
70        0.000004991
71        0.000005139
72        0.000005274
73        0.000005442
74        0.000005549
75        0.000005704
76        0.000005921
77        0.000006020
78        0.000006226
79        0.000006342
80        0.000006742
81        0.000006855
82        0.000007019
83        0.000007181
84        0.000007380
85        0.000007529
86        0.000007674
87        0.000007733
88        0.000008123
89        0.000008130
90        0.000008421
91        0.000008468
92        0.000008748
93        0.000008864
94        0.000009011
95        0.000009163
96        0.000009462
97        0.000009550
98        0.000009791
99        0.000009950
100       0.000010140

./tuneup
Parameters for ./mpn/x86_64/k8/k10/k102/gmp-mparam.h
Using: CPU cycle counter, supplemented by microsecond getrusage()
speed_precision 1000000, speed_unittime 4.35e-10 secs, CPU freq 2300.19 MHz
DEFAULT_MAX_SIZE 1000, fft_max_size 50000

/* Generated by tuneup.c, 2014-02-17, gcc 4.6 */

#define MUL_KARATSUBA_THRESHOLD          14
#define MUL_TOOM3_THRESHOLD              89
#define MUL_TOOM4_THRESHOLD             238
#define MUL_TOOM8H_THRESHOLD            351

#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
#define SQR_KARATSUBA_THRESHOLD          20
#define SQR_TOOM3_THRESHOLD             122
#define SQR_TOOM4_THRESHOLD             680
#define SQR_TOOM8_THRESHOLD             680

#define POWM_THRESHOLD                  128

#define HGCD_THRESHOLD                   91
#define GCD_DC_THRESHOLD                418
#define GCDEXT_DC_THRESHOLD             351
#define JACOBI_BASE_METHOD                1

#define DIVREM_1_NORM_THRESHOLD       MP_SIZE_T_MAX  /* never */
#define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* never */
#define MOD_1_NORM_THRESHOLD              0  /* always */
#define MOD_1_UNNORM_THRESHOLD            0  /* always */
#define USE_PREINV_DIVREM_1               1  /* native */
#define USE_PREINV_MOD_1                  1
#define DIVEXACT_1_THRESHOLD              0  /* always */
#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
#define MOD_1_1_THRESHOLD                 7
#define MOD_1_2_THRESHOLD                 7
#define MOD_1_3_THRESHOLD                20
#define DIVREM_HENSEL_QR_1_THRESHOLD     13
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD     13
#define DIVREM_EUCLID_HENSEL_THRESHOLD     24

#define ROOTREM_THRESHOLD                 6

#define GET_STR_DC_THRESHOLD             12
#define GET_STR_PRECOMPUTE_THRESHOLD     19
#define SET_STR_DC_THRESHOLD            454
#define SET_STR_PRECOMPUTE_THRESHOLD    538

#define MUL_FFT_FULL_THRESHOLD         3904

#define SQR_FFT_FULL_THRESHOLD         3392

#define MULLOW_BASECASE_THRESHOLD         0  /* always */
#define MULLOW_DC_THRESHOLD              33
#define MULLOW_MUL_THRESHOLD           5565

#define MULHIGH_BASECASE_THRESHOLD        7
#define MULHIGH_DC_THRESHOLD             29
#define MULHIGH_MUL_THRESHOLD          3336

#define MULMOD_2EXPM1_THRESHOLD          14

#define FAC_UI_THRESHOLD               8671
#define DC_DIV_QR_THRESHOLD              52
#define DC_DIVAPPR_Q_N_THRESHOLD         12
#define INV_DIV_QR_THRESHOLD           2130
#define INV_DIVAPPR_Q_N_THRESHOLD        12
#define DC_DIV_Q_THRESHOLD              122
#define INV_DIV_Q_THRESHOLD            6039
#define DC_DIVAPPR_Q_THRESHOLD           41
#define INV_DIVAPPR_Q_THRESHOLD       16039
#define DC_BDIV_QR_THRESHOLD             54
#define DC_BDIV_Q_THRESHOLD              35

/* fft_tuning -- autogenerated by tune-fft */

#define FFT_TAB \
   { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }

#define MULMOD_TAB \
   { 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 }

#define FFT_N_NUM 19

#define FFT_MULMOD_2EXPP1_CUTOFF 256


/* Tuneup completed successfully, took 166 seconds */


################################################################################
# Without assembly file
################################################################################

jpflori@combinat:~/build/mpir/tune$ ./speed -s 1-100 -t 1 mpn_sqr_basecase
overhead 0.000000002 secs, precision 1000000 units of 4.35e-10 secs, CPU 
freq 2300.19 MHz
        mpn_sqr_basecase
1         0.000000008
2         0.000000016
3         0.000000046
4         0.000000061
5         0.000000079
6         0.000000097
7         0.000000125
8         0.000000144
9         0.000000171
10        0.000000189
11        0.000000237
12        0.000000262
13        0.000000293
14        0.000000335
15        0.000000368
16        0.000000395
17        0.000000468
18        0.000000487
19        0.000000545
20        0.000000580
21        0.000000663
22        0.000000684
23        0.000000741
24        0.000000805
25        0.000000893
26        0.000000888
27        0.000001022
28        0.000001021
29        0.000001138
30        0.000001152
31        0.000001260
Segmentation fault (core dumped)

-- 
You received this message because you are subscribed to the Google Groups 
"mpir-devel" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to mpir-devel+unsubscr...@googlegroups.com.
To post to this group, send email to mpir-devel@googlegroups.com.
Visit this group at http://groups.google.com/group/mpir-devel.
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to