Hello Brian !
I just applied you changes to my fork of tcc made fully reentrant here
https://github.com/mingodad/tinycc and tested with the script shown
bellow building sqlite3 with tcc and gcc -O0.
The resulting sqlite3 was tested creating a database from 150MB of sql
and the resulting databases were the same (sqlite3-tcc/sqlite3-gcc) then
the final test shown here was done with a memory database instead of disk.
Result:
====
bin old new diff %reduction
(old/new)
--- --- --- ---- ----------
sqlite3-tcc 1449540 1412676 36864 2.60
sqlite3-gcc 1408080 1408080 0 0
====
Script to run the test:
====
sqlh=$HOME/dev/dadbiz++/third-party/dad/sqlite3-orig
showCPUSpeed() {
cat /proc/cpuinfo | grep '^[c]pu MHz'
}
doIt() {
showCPUSpeed
echo compiling sqlite3 with tcc
/usr/bin/time ./tcc -o sqlite3-tcc $sqlh/sqlite3.c $sqlh/shell.c -lm
-lpthread -ldl
ls -l sqlite3-tcc
#dbsql=$HOME/dev/AMPL/dad/sql/y-mod-sql/y103-hard-dat.db.sql
#dbsql=$HOME/dev/SquiLu/db-api/ourbiz.db.sql
dbsql=$HOME/dev/SquiLu/db-api/companies_uk_RG.db.sql
dbtcc=sdb-tcc.db
#rm $dbtcc
showCPUSpeed
#/usr/bin/time ./sqlite3-tcc $dbtcc < $dbsql
echo running sqlite3-tcc
/usr/bin/time ./sqlite3-tcc < $dbsql
showCPUSpeed
echo compiling sqlite3 with gcc
/usr/bin/time gcc -O0 -o sqlite3-gcc $sqlh/sqlite3.c $sqlh/shell.c -lm
-lpthread -ldl
ls -l sqlite3-gcc
db0=sdb-gcc.db
showCPUSpeed
#rm $db0
#/usr/bin/time ./sqlite3-gcc $db0 < $dbsql
echo running sqlite3-gcc
/usr/bin/time ./sqlite3-gcc < $dbsql
showCPUSpeed
}
doIt
doIt
====
Output before changes:
====
cpu MHz : 1043.890
cpu MHz : 955.076
cpu MHz : 1368.369
cpu MHz : 1202.189
cpu MHz : 997.435
cpu MHz : 1118.064
cpu MHz : 1330.976
cpu MHz : 972.875
compiling sqlite3 with tcc
0.10user 0.01system 0:00.11elapsed 100%CPU (0avgtext+0avgdata
15840maxresident)k
0inputs+2832outputs (0major+3916minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1449540 abr 6 20:08 sqlite3-tcc
cpu MHz : 2776.711
cpu MHz : 2657.025
cpu MHz : 3145.106
cpu MHz : 3028.188
cpu MHz : 3135.120
cpu MHz : 2681.011
cpu MHz : 2891.677
cpu MHz : 2840.370
running sqlite3-tcc
12.10user 0.12system 0:12.23elapsed 99%CPU (0avgtext+0avgdata
180488maxresident)k
0inputs+2976outputs (0major+44895minor)pagefaults 0swaps
cpu MHz : 2712.282
cpu MHz : 2700.523
cpu MHz : 2700.011
cpu MHz : 2706.670
cpu MHz : 2699.597
cpu MHz : 2699.745
cpu MHz : 2699.178
cpu MHz : 2699.843
compiling sqlite3 with gcc
4.00user 0.15system 0:04.16elapsed 100%CPU (0avgtext+0avgdata
234052maxresident)k
0inputs+2752outputs (0major+87170minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr 6 20:09 sqlite3-gcc
cpu MHz : 2978.379
cpu MHz : 2820.748
cpu MHz : 2880.415
cpu MHz : 2804.540
cpu MHz : 2853.414
cpu MHz : 2834.585
cpu MHz : 2811.112
cpu MHz : 2841.551
running sqlite3-gcc
10.86user 0.06system 0:10.93elapsed 100%CPU (0avgtext+0avgdata
179924maxresident)k
0inputs+2976outputs (0major+44880minor)pagefaults 0swaps
cpu MHz : 2700.364
cpu MHz : 2700.918
cpu MHz : 2699.985
cpu MHz : 2700.143
cpu MHz : 2699.869
cpu MHz : 2700.273
cpu MHz : 2700.024
cpu MHz : 2699.606
cpu MHz : 2700.364
cpu MHz : 2700.918
cpu MHz : 2699.985
cpu MHz : 2700.143
cpu MHz : 2699.869
cpu MHz : 2700.273
cpu MHz : 2700.024
cpu MHz : 2699.606
compiling sqlite3 with tcc
0.10user 0.00system 0:00.11elapsed 100%CPU (0avgtext+0avgdata
15952maxresident)k
0inputs+2832outputs (0major+3917minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1449540 abr 6 20:09 sqlite3-tcc
cpu MHz : 3143.321
cpu MHz : 2982.310
cpu MHz : 3187.922
cpu MHz : 2955.090
cpu MHz : 2770.887
cpu MHz : 2942.430
cpu MHz : 3012.071
cpu MHz : 3283.026
running sqlite3-tcc
12.04user 0.13system 0:12.18elapsed 99%CPU (0avgtext+0avgdata
180600maxresident)k
0inputs+2976outputs (0major+44895minor)pagefaults 0swaps
cpu MHz : 2700.171
cpu MHz : 2700.743
cpu MHz : 2700.004
cpu MHz : 2700.256
cpu MHz : 2700.178
cpu MHz : 2700.150
cpu MHz : 2699.892
cpu MHz : 2697.367
compiling sqlite3 with gcc
4.02user 0.13system 0:04.16elapsed 99%CPU (0avgtext+0avgdata
234080maxresident)k
0inputs+2752outputs (0major+87125minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr 6 20:09 sqlite3-gcc
cpu MHz : 2799.667
cpu MHz : 2915.447
cpu MHz : 2942.032
cpu MHz : 2726.340
cpu MHz : 2654.825
cpu MHz : 2850.573
cpu MHz : 2919.901
cpu MHz : 2873.281
running sqlite3-gcc
10.74user 0.10system 0:10.85elapsed 99%CPU (0avgtext+0avgdata
179868maxresident)k
0inputs+2976outputs (0major+44884minor)pagefaults 0swaps
cpu MHz : 2700.370
cpu MHz : 2699.943
cpu MHz : 2700.177
cpu MHz : 2700.000
cpu MHz : 2700.023
cpu MHz : 2699.778
cpu MHz : 2699.937
cpu MHz : 2700.448
====
Output after changes:
====
cpu MHz : 1924.742
cpu MHz : 993.702
cpu MHz : 1634.113
cpu MHz : 1038.054
cpu MHz : 1496.319
cpu MHz : 1681.467
cpu MHz : 2188.883
cpu MHz : 945.493
compiling sqlite3 with tcc
0.09user 0.02system 0:00.11elapsed 100%CPU (0avgtext+0avgdata
15800maxresident)k
0inputs+2760outputs (0major+3918minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1412676 abr 6 20:11 sqlite3-tcc
cpu MHz : 3189.895
cpu MHz : 2801.464
cpu MHz : 3225.923
cpu MHz : 3189.898
cpu MHz : 2968.607
cpu MHz : 2662.467
cpu MHz : 2962.322
cpu MHz : 2780.141
running sqlite3-tcc
11.97user 0.13system 0:12.10elapsed 99%CPU (0avgtext+0avgdata
180468maxresident)k
0inputs+2976outputs (0major+44892minor)pagefaults 0swaps
cpu MHz : 2699.884
cpu MHz : 2700.476
cpu MHz : 2699.983
cpu MHz : 2703.341
cpu MHz : 2700.095
cpu MHz : 2700.744
cpu MHz : 2699.986
cpu MHz : 2700.232
compiling sqlite3 with gcc
4.03user 0.10system 0:04.13elapsed 100%CPU (0avgtext+0avgdata
234000maxresident)k
0inputs+2752outputs (0major+87143minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr 6 20:11 sqlite3-gcc
cpu MHz : 3063.076
cpu MHz : 2904.942
cpu MHz : 2930.517
cpu MHz : 2914.045
cpu MHz : 2957.797
cpu MHz : 2916.358
cpu MHz : 2907.853
cpu MHz : 2933.959
running sqlite3-gcc
10.78user 0.10system 0:10.90elapsed 99%CPU (0avgtext+0avgdata
179872maxresident)k
0inputs+2976outputs (0major+44881minor)pagefaults 0swaps
cpu MHz : 2701.588
cpu MHz : 2700.007
cpu MHz : 2701.630
cpu MHz : 2699.928
cpu MHz : 2700.337
cpu MHz : 2699.619
cpu MHz : 2699.824
cpu MHz : 2700.400
cpu MHz : 2701.588
cpu MHz : 2700.007
cpu MHz : 2701.630
cpu MHz : 2699.928
cpu MHz : 2700.337
cpu MHz : 2699.619
cpu MHz : 2699.824
cpu MHz : 2700.400
compiling sqlite3 with tcc
0.10user 0.00system 0:00.11elapsed 99%CPU (0avgtext+0avgdata
15752maxresident)k
0inputs+2760outputs (0major+3916minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1412676 abr 6 20:11 sqlite3-tcc
cpu MHz : 3207.638
cpu MHz : 3100.938
cpu MHz : 2931.666
cpu MHz : 3233.458
cpu MHz : 3143.723
cpu MHz : 3139.950
cpu MHz : 3027.417
cpu MHz : 3045.425
running sqlite3-tcc
11.84user 0.12system 0:11.96elapsed 99%CPU (0avgtext+0avgdata
180432maxresident)k
0inputs+2976outputs (0major+44893minor)pagefaults 0swaps
cpu MHz : 2700.060
cpu MHz : 2700.690
cpu MHz : 2700.411
cpu MHz : 2701.015
cpu MHz : 2700.212
cpu MHz : 2699.936
cpu MHz : 2700.098
cpu MHz : 2700.684
compiling sqlite3 with gcc
4.00user 0.15system 0:04.16elapsed 100%CPU (0avgtext+0avgdata
234012maxresident)k
0inputs+2752outputs (0major+87185minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr 6 20:12 sqlite3-gcc
cpu MHz : 2785.807
cpu MHz : 2720.240
cpu MHz : 2799.280
cpu MHz : 2784.467
cpu MHz : 2724.321
cpu MHz : 2907.124
cpu MHz : 2757.591
cpu MHz : 2718.077
running sqlite3-gcc
10.82user 0.12system 0:10.94elapsed 99%CPU (0avgtext+0avgdata
179916maxresident)k
0inputs+2976outputs (0major+44885minor)pagefaults 0swaps
cpu MHz : 2699.992
cpu MHz : 2700.192
cpu MHz : 2699.976
cpu MHz : 2701.052
cpu MHz : 2699.528
cpu MHz : 2700.650
cpu MHz : 2700.131
cpu MHz : 2700.006
====
Cheers !
On 6/4/22 18:06, Brian Callahan wrote:
Seeing as I've had all positive feedback on this, here's a more complete
diff that I think is suitable for committing.
It does the following:
1. Converts movl $0, %e{ax,cx,dx,sp,si,di} to xorl
%e{ax,cx,dx,sp,si,di}, %e{ax,cx,dx,sp,si,di}
2. Converts movq $0, %r{ax,cx,dx,sp,si,di} to xorl
%e{ax,cx,dx,sp,si,di}, %e{ax,cx,dx,sp,si,di}
There are two places where these idioms can be emitted, so it handles
both cases.
Here are some before and after .text size numbers:
bin old new diff %reduction
--- --- --- ---- ----------
tcc 328786 321358 7428 2.26
libtcc.a 307288 300252 7036 2.29
bcheck.o 23254 22801 453 1.95
bt-exe.o 4732 4550 182 3.85
bt-log.o 648 639 9 1.39
libtcc1.a 12678 12119 559 4.41
There is no change in compilation speed as far as I can measure.
There is an additional third location where a mov $0, %eax can be
emitted. It's in the form:
mov $0, %eax
jmp eb 05
mov $1, %eax
-or-
mov $1, %eax
jmp eb 05
mov $0, %eax
I could not find where this was happening, and the one place that looks
like it would be the place seems not to be. I don't think it impedes the
review and committing of this diff. And it'll give me something to do on
a rainy day if no one else beats me to it :)
At this point, I'd like any feedback on the diff below and/or
encouragement to commit it to mob.
Thanks.
~Brian
diff --git a/x86_64-gen.c b/x86_64-gen.c
index 81ec5d9..5085a0a 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -483,11 +483,21 @@ void load(int r, SValue *sv)
}
#endif
} else if (is64_type(ft)) {
- orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
- gen_le64(sv->c.i);
+ if (sv->c.i == 0 && r < 8) {
+ o(0x31); /* xor r, r */
+ o(0xc0 + REG_VALUE(r) * 9);
+ } else {
+ orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
+ gen_le64(sv->c.i);
+ }
} else {
- orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
- gen_le32(fc);
+ if (fc == 0 && r < 8) {
+ o(0x31); /* xor r, r */
+ o(0xc0 + REG_VALUE(r) * 9);
+ } else {
+ orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
+ gen_le32(fc);
+ }
}
} else if (v == VT_LOCAL) {
orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
@@ -1422,8 +1432,12 @@ void gfunc_call(int nb_args)
}
}
- if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or
FUNC_ELLIPSIS */
- oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov
nb_sse_args, %eax */
+ if (vtop->type.ref->f.func_type != FUNC_NEW) { /* implies FUNC_OLD
or FUNC_ELLIPSIS */
+ if (nb_sse_args == 0)
+ o(0xc031); /* xor eax, eax */
+ else
+ oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov
nb_sse_args, %eax */
+ }
gcall_or_jmp(0);
if (args_size)
gadd_sp(args_size);
_______________________________________________
Tinycc-devel mailing list
Tinycc-devel@nongnu.org
https://lists.nongnu.org/mailman/listinfo/tinycc-devel
_______________________________________________
Tinycc-devel mailing list
Tinycc-devel@nongnu.org
https://lists.nongnu.org/mailman/listinfo/tinycc-devel