[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-06 Thread steven at gcc dot gnu dot org

--- Additional Comments From steven at gcc dot gnu dot org  2005-06-06 
07:16 ---
Could L1 icache blow-out be the reason? 

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-06 Thread rakdver at atrey dot karlin dot mff dot cuni dot cz

--- Additional Comments From rakdver at atrey dot karlin dot mff dot cuni 
dot cz  2005-06-06 07:30 ---
Subject: Re:  openssl is slower when compiled with gcc 4.0 than 3.3

 Could L1 icache blow-out be the reason? 

This is not likely with the minimized example.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-06 Thread giovannibajo at libero dot it

--- Additional Comments From giovannibajo at libero dot it  2005-06-06 
13:33 ---
Uhm, at this point, I don't believe anymore that the loop I posted is the cause 
of the regression. Maybe the regression is somewhere else. I'll investigate.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-06 Thread giovannibajo at libero dot it

--- Additional Comments From giovannibajo at libero dot it  2005-06-06 
14:40 ---
Looks like the culrpit is this:

=
static unsigned int S[256];
unsigned
md2_block (unsigned int *sp1, unsigned int *sp2, const unsigned char *d)
{
register unsigned int t;
register int i, j;
static unsigned int state[48];

j = sp2[16 - 1];
for (i = 0; i  16; i++)
{
state[i] = sp1[i];
state[i + 16] = t = d[i];
state[i + 32] = (t ^ sp1[i]);
j = sp2[i] ^= S[t ^ j];
}
}
=


gcc 3.4.3 -fPIC -O2:
===
.L5:
movl8(%ebp), %esi
movl(%esi,%ecx,4), %eax
movl%eax, [EMAIL PROTECTED](%ebx,%ecx,4)
movl16(%ebp), %edx
movzbl  (%edx,%ecx), %eax
movl%eax, [EMAIL PROTECTED](%ebx,%ecx,4)
movl(%esi,%ecx,4), %edx
xorl%eax, %edx
movl-16(%ebp), %esi
xorl-20(%ebp), %eax
movl%edx, [EMAIL PROTECTED](%ebx,%ecx,4)
movl(%esi,%eax,4), %eax
xorl(%edi,%ecx,4), %eax
movl%eax, (%edi,%ecx,4)
incl%ecx
cmpl$15, %ecx
movl%eax, -20(%ebp)
jle .L5
===



gcc 4.1.0 20050529 -fPIC -O2:
===
.L2:
movl8(%ebp), %eax
leal0(,%edi,4), %ecx
movl%ecx, -28(%ebp)
addl%ecx, %eax
movl16(%ebp), %ecx
movl%eax, %edx
movl%eax, -24(%ebp)
movl-4(%eax), %eax
movl%eax, (%esi)
movzbl  -1(%ecx,%edi), %eax
incl%edi
movl%eax, 64(%esi)
movl-4(%edx), %ecx
movl12(%ebp), %edx
xorl%eax, %ecx
movl%ecx, 128(%esi)
movl-28(%ebp), %ecx
addl$4, %esi
addl%edx, %ecx
movl-16(%ebp), %edx
xorl%edx, %eax
movl-20(%ebp), %edx
movl(%edx,%eax,4), %eax
movl-4(%ecx), %edx
xorl%edx, %eax
cmpl$17, %edi
movl%eax, -4(%ecx)
movl%eax, -16(%ebp)
jne .L2
===


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-06 Thread rakdver at atrey dot karlin dot mff dot cuni dot cz

--- Additional Comments From rakdver at atrey dot karlin dot mff dot cuni 
dot cz  2005-06-06 15:00 ---
Subject: Re:  openssl is slower when compiled with gcc 4.0 than 3.3

 Looks like the culrpit is this:
 
 =
 static unsigned int S[256];
 unsigned
 md2_block (unsigned int *sp1, unsigned int *sp2, const unsigned char *d)
 {
   register unsigned int t;
   register int i, j;
   static unsigned int state[48];
 
   j = sp2[16 - 1];
   for (i = 0; i  16; i++)
   {
   state[i] = sp1[i];
   state[i + 16] = t = d[i];
   state[i + 32] = (t ^ sp1[i]);
   j = sp2[i] ^= S[t ^ j];
   }
 }
 =

with the TARGET_MEM_REFs patch the result is much better.  At
least we avoid the multiplication by 4

   leal0(,%edi,4), %ecx

and other results of the DOM missoptimization of addressing modes, that was
one of the main motivations for TARGET_MEM_REFs.

We still use one more iv than in the 3.4 case, and in result we need one
more register.

.L2:
movl8(%ebp), %edi
movl-4(%edi,%ecx,4), %eax
movl%eax, (%esi)
movl16(%ebp), %edx
movzbl  -1(%ecx,%edx), %eax
movl%eax, 64(%esi)
movl-4(%edi,%ecx,4), %edx
xorl%eax, %edx
movl%edx, 128(%esi)
xorl-20(%ebp), %eax
movl-16(%ebp), %edi
movl(%edi,%eax,4), %eax
movl12(%ebp), %edx
xorl-4(%edx,%ecx,4), %eax
movl%eax, -4(%edx,%ecx,4)
movl%eax, -20(%ebp)
incl%ecx
addl$4, %esi
cmpl$17, %ecx
jne .L2


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-02 Thread rakdver at atrey dot karlin dot mff dot cuni dot cz

--- Additional Comments From rakdver at atrey dot karlin dot mff dot cuni 
dot cz  2005-06-02 08:01 ---
Subject: Re:  openssl is slower when compiled with gcc 4.0 than 3.3

The assembler attributed to 4.0 was produced by mainline (or some
patched version of 4.0), wasn't it?
Otherwise I cannot imagine why the inner loop would be unrolled.

For plain 4.0, we get the following code, which seems just fine
and equivalent to the one obtained with 3.4 (one of the memory
references is strength reduced, but since we still fit into registers,
this is OK).

I don't just now see what/whether there is some problem with the code
produced by 4.1, but I also don't see anything related to addressing
mode selection there.

.L21:
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl(%edx), %eax
movl%eax, (%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl4(%edx), %eax
movl%eax, 4(%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl8(%edx), %eax
movl%eax, 8(%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl12(%edx), %eax
movl%eax, 12(%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl16(%edx), %eax
movl%eax, 16(%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl20(%edx), %eax
movl%eax, 20(%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl24(%edx), %eax
movl%eax, 24(%edx)
movl[EMAIL PROTECTED](%ebx,%eax,4), %eax
xorl28(%edx), %eax
movl%eax, 28(%edx)
addl$32, %edx
leal-12(%ebp), %esi
cmpl%esi, %edx
jne .L21



-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-01 Thread yx at cs dot ucla dot edu

--- Additional Comments From yx at cs dot ucla dot edu  2005-06-01 20:47 
---
When we ran 'openssh speed md2', we did see that gcc-4.0 was slower
than earlier versions, so we created a minimal test case, which
we will attach.  Here is how long it took to run a 34 megabyte
file through the test program when compiled with various compilers and
options:

gcc-2.95.3 -fPIC -O1 4.940s
gcc-4.0.0  -fPIC -O1 3.510s
gcc-3.4.3  -fPIC -O1 5.190s

gcc-2.95.3 -fPIC -O2 3.470s
gcc-3.4.3  -fPIC -O2 3.460s
gcc-4.0.0  -fPIC -O2 4.050s

gcc-2.95.3 -fPIC -O3 3.400s
gcc-3.4.3  -fPIC -O3 3.740s
gcc-4.0.0  -fPIC -O3 4.010s

This test was done on a pentium 4 workstation, and no smoothing was
done on the resulting times, but they seemed to be repeatable.
We also tried without -fPIC, but did not see as large a regression there.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-01 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-06-01 
20:55 ---
I would not doubt this is just not using the i386 address mode

-- 
   What|Removed |Added

 Status|RESOLVED|UNCONFIRMED
 Resolution|INVALID |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-01 Thread giovannibajo at libero dot it

--- Additional Comments From giovannibajo at libero dot it  2005-06-01 
22:55 ---
Confirmed. The regression appears only with -fPIC, and it's pretty evident. The 
core is md2_block, the inner loop:

GCC 3.4
=
.L29:
xorl%edx, %edx
.p2align 2,,3
.L28:
movl[EMAIL PROTECTED](%ebx,%eax,4), %esi
xorl-216(%ebp,%edx,4), %esi
movl[EMAIL PROTECTED](%ebx,%esi,4), %eax
xorl-212(%ebp,%edx,4), %eax
movl[EMAIL PROTECTED](%ebx,%eax,4), %edi
xorl-208(%ebp,%edx,4), %edi
movl%esi, -216(%ebp,%edx,4)
movl[EMAIL PROTECTED](%ebx,%edi,4), %esi
xorl-204(%ebp,%edx,4), %esi
movl%eax, -212(%ebp,%edx,4)
movl[EMAIL PROTECTED](%ebx,%esi,4), %eax
xorl-200(%ebp,%edx,4), %eax
movl%edi, -208(%ebp,%edx,4)
movl[EMAIL PROTECTED](%ebx,%eax,4), %edi
xorl-196(%ebp,%edx,4), %edi
movl%esi, -204(%ebp,%edx,4)
movl[EMAIL PROTECTED](%ebx,%edi,4), %esi
xorl-192(%ebp,%edx,4), %esi
movl%eax, -200(%ebp,%edx,4)
movl[EMAIL PROTECTED](%ebx,%esi,4), %eax
xorl-188(%ebp,%edx,4), %eax
movl%edi, -196(%ebp,%edx,4)
movl%esi, -192(%ebp,%edx,4)
movl%eax, -188(%ebp,%edx,4)
addl$8, %edx
cmpl$47, %edx
jle .L28
addl%ecx, %eax
incl%ecx
andl$255, %eax
cmpl$17, %ecx
jle .L29
=



GCC 4.0
=
.L16:
movl-384(%ebp), %eax
movl-208(%ebp), %esi
incl-384(%ebp)
addl%esi, %eax
movl-456(%ebp), %esi
andl$255, %eax
movl(%edi,%eax,4), %ecx
movl-464(%ebp), %eax
xorl%ecx, %esi
movl(%edi,%esi,4), %edx
movl%esi, -368(%ebp)
movl%esi, -456(%ebp)
movl-488(%ebp), %esi
xorl%edx, %eax
movl-472(%ebp), %edx
movl(%edi,%eax,4), %ecx
movl(%edi,%eax,4), %ecx
movl%eax, -364(%ebp)
movl%eax, -464(%ebp)
xorl%ecx, %edx
movl-480(%ebp), %ecx
movl(%edi,%edx,4), %eax
movl%edx, -360(%ebp)
movl%edx, -472(%ebp)
xorl%eax, %ecx
movl(%edi,%ecx,4), %eax
movl%ecx, -356(%ebp)
movl%ecx, -480(%ebp)
xorl%eax, %esi
movl-496(%ebp), %eax
movl(%edi,%esi,4), %edx
movl%esi, -352(%ebp)
movl%esi, -488(%ebp)
xorl%edx, %eax
movl-504(%ebp), %edx
movl(%edi,%eax,4), %ecx
movl%eax, -348(%ebp)
movl%eax, -496(%ebp)
xorl%ecx, %edx
movl-512(%ebp), %ecx
movl(%edi,%edx,4), %eax
movl%edx, -344(%ebp)
movl%edx, -504(%ebp)
xorl%eax, %ecx
movl%ecx, -340(%ebp)
movl(%edi,%ecx,4), %eax
movl-520(%ebp), %esi
movl%ecx, -512(%ebp)
xorl%eax, %esi
movl-528(%ebp), %eax
movl(%edi,%esi,4), %edx
movl%esi, -336(%ebp)
movl%esi, -520(%ebp)
movl-552(%ebp), %esi
xorl%edx, %eax
movl-536(%ebp), %edx
movl(%edi,%eax,4), %ecx
movl%eax, -332(%ebp)
movl%eax, -528(%ebp)
xorl%ecx, %edx
movl-544(%ebp), %ecx
movl(%edi,%edx,4), %eax
movl%edx, -328(%ebp)
movl%edx, -536(%ebp)
xorl%eax, %ecx
movl(%edi,%ecx,4), %eax
movl%ecx, -324(%ebp)
movl%ecx, -544(%ebp)
xorl%eax, %esi
movl-556(%ebp), %eax
movl(%edi,%esi,4), %edx
movl%esi, -320(%ebp)
movl%esi, -552(%ebp)
movl-568(%ebp), %esi
xorl%edx, %eax
movl-560(%ebp), %edx
movl(%edi,%eax,4), %ecx
movl%eax, -316(%ebp)
movl%eax, -556(%ebp)
xorl%ecx, %edx
movl-564(%ebp), %ecx
movl(%edi,%edx,4), %eax
movl%edx, -312(%ebp)
movl%edx, -560(%ebp)
xorl%eax, %ecx
movl(%edi,%ecx,4), %eax
movl%ecx, -308(%ebp)
movl%ecx, -564(%ebp)
xorl%eax, %esi
movl%esi, -304(%ebp)
movl(%edi,%esi,4), %edx
movl-572(%ebp), %eax
movl%esi, -568(%ebp)
movl-396(%ebp), %esi
xorl%edx, %eax
movl-576(%ebp), %edx
movl(%edi,%eax,4), %ecx
movl%eax, -300(%ebp)
movl%eax, -572(%ebp)
xorl%ecx, %edx
movl-580(%ebp), %ecx

[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-06-01 Thread giovannibajo at libero dot it

--- Additional Comments From giovannibajo at libero dot it  2005-06-01 
22:59 ---
I wonder if this is fixed by TARGET_MEM_REF.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-05-14 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-05-14 
20:36 ---
No feedback in 3 months.

-- 
   What|Removed |Added

 Status|WAITING |RESOLVED
 Resolution||INVALID


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-02-12 Thread pinskia at gcc dot gnu dot org


-- 
   What|Removed |Added

  Component|c   |target
   Keywords||missed-optimization


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923


[Bug target/19923] openssl is slower when compiled with gcc 4.0 than 3.3

2005-02-12 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-02-12 
22:24 ---
We need a self contained example.

-- 
   What|Removed |Added

 CC||pinskia at gcc dot gnu dot
   ||org
 Status|UNCONFIRMED |WAITING


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923