------- Comment #1 from hjl dot tools at gmail dot com  2008-10-29 05:44 -------
It looks like the cost of loading/storing FP values aren't appropriate for
Core 2. With this patch:

[EMAIL PROTECTED] i386]$ diff -up i386.c.foo i386.c
--- i386.c.foo  2008-10-28 21:56:19.000000000 -0700
+++ i386.c      2008-10-28 22:01:53.000000000 -0700
@@ -990,9 +990,9 @@ struct processor_costs core2_cost = {
                                           Relative to reg-reg move (2).  */
   {4, 4, 4},                           /* cost of storing integer registers */
   2,                                   /* cost of reg,reg fld/fst */
-  {6, 6, 6},                           /* cost of loading fp registers
+  {12, 12, 12},                                /* cost of loading fp registers
                                           in SFmode, DFmode and XFmode */
-  {4, 4, 4},                           /* cost of storing fp registers
+  {6, 6, 8},                           /* cost of storing fp registers
                                           in SFmode, DFmode and XFmode */
   2,                                   /* cost of moving MMX register */
   {6, 6},                              /* cost of loading MMX registers
@@ -1000,9 +1000,9 @@ struct processor_costs core2_cost = {
   {4, 4},                              /* cost of storing MMX registers
                                           in SImode and DImode */
   2,                                   /* cost of moving SSE register */
-  {6, 6, 6},                           /* cost of loading SSE registers
+  {8, 8, 8},                           /* cost of loading SSE registers
                                           in SImode, DImode and TImode */
-  {4, 4, 4},                           /* cost of storing SSE registers
+  {8, 8, 8},                           /* cost of storing SSE registers
                                           in SImode, DImode and TImode */
   2,                                   /* MMX or SSE register to integer */
   32,                                  /* size of l1 cache.  */
[EMAIL PROTECTED] i386]$

I got

[EMAIL PROTECTED] gcc]$  ./xgcc -B./ -m32 -O2 /tmp/foo.c -o core2.sse 
-mtune=core2 
-msse3 -mfpmath=sse
[EMAIL PROTECTED] gcc]$  ./xgcc -B./ -m32 -O2 /tmp/foo.c -o core2 -mtune=core2
[EMAIL PROTECTED] gcc]$ ./xgcc -B./ -m32 -O2 /tmp/foo.c -o o2  -msse3 
-mfpmath=sse
[EMAIL PROTECTED] gcc]$  ./xgcc -B./ -m32 -O2 /tmp/foo.c -o o2.sse
[EMAIL PROTECTED] gcc]$ time ./o2

real    0m7.163s
user    0m7.161s
sys     0m0.001s
[EMAIL PROTECTED] gcc]$ time ./core2

real    0m7.833s
user    0m7.829s
sys     0m0.001s
[EMAIL PROTECTED] gcc]$ time ./o2.sse

real    0m7.795s
user    0m7.794s
sys     0m0.000s
[EMAIL PROTECTED] gcc]$ time ./core2.sse

real    0m7.339s
user    0m7.337s
sys     0m0.001s
[EMAIL PROTECTED] gcc]$

But even with this patch, IRA still generates slower codes:

[EMAIL PROTECTED] gcc]$ ./xgcc -B./ -m32 -O2 /tmp/foo.c -o core2.noira 
-mtune=core2
-fno-ira
[EMAIL PROTECTED] gcc]$ time ./core2.noira

real    0m7.444s
user    0m7.441s
sys     0m0.001s
[EMAIL PROTECTED] gcc]$ ./xgcc -B./ -m32 -O2 /tmp/foo.c -o core2.sse.noira
-mtune=core2 -fno-ira -msse3 -mfpmath=sse
[EMAIL PROTECTED] gcc]$ time ./core2.sse.noira

real    0m7.229s
user    0m7.224s
sys     0m0.000s
[EMAIL PROTECTED] gcc]$


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37948

Reply via email to