------- Comment #5 from ubizjak at gmail dot com 2008-01-07 12:19 ------- Confirmed by following testcase:
--cut here-- #include <stdio.h> void __attribute__((noinline)) dtime (void) { __asm__ __volatile__ ("" : : : "memory"); } double sa, sb, sc, sd; double one, two, four, five; double piref, piprg, pierr; int main (int argc, char *argv[]) { double s, u, v, w, x; long i, m; piref = 3.14159265358979324; one = 1.0; two = 2.0; four = 4.0; five = 5.0; m = 512000000; dtime(); s = -five; sa = -one; dtime(); for (i = 1; i <= m; i++) { s = -s; sa = sa + s; } dtime(); sc = (double) m; u = sa; v = 0.0; w = 0.0; x = 0.0; dtime(); for (i = 1; i <= m; i++) { s = -s; sa = sa + s; u = u + two; x = x + (s - u); v = v - s * u; w = w + s / u; } dtime(); m = (long) (sa * x / sc); sa = four * w / five; sb = sa + five / v; sc = 31.25; piprg = sb - sc / (v * v * v); pierr = piprg - piref; printf ("%13.4le\n", pierr); return 0; } --cut here-- .L5: xorb $-128, -17(%ebp) #, s addl $1, %eax #, i.65 addsd %xmm4, %xmm1 # two.16, u cmpl $512000001, %eax #, i.65 movsd -24(%ebp), %xmm0 # s, tmp90 addsd -24(%ebp), %xmm2 # s, sa_lsm.48 mulsd %xmm1, %xmm0 # u, tmp90 subsd %xmm0, %xmm3 # tmp90, v movsd -24(%ebp), %xmm0 # s, tmp91 divsd %xmm1, %xmm0 # u, tmp91 addsd -16(%ebp), %xmm0 # w, tmp91 movsd %xmm0, -16(%ebp) # tmp91, w jne .L5 #, It is somehow possible to tolerate that "s" and "w" are not pushed into registers due to non-existent live range splitting (PR 23322), the main problem here is that the sign of "s"is changed in the memory by using (unaligned) xorb insn. The same situation is in the first (shorter) loop: .L4: xorb $-128, -17(%ebp) #, s addl $1, %eax #, i cmpl $512000001, %eax #, i addsd -24(%ebp), %xmm0 # s, sa_lsm.97 jne .L4 #, The performance regression is caused by partial memory stall [1]. [1] Agner Fog: How to optimize for the Pentium family of microprocessors, section 14.7 -- ubizjak at gmail dot com changed: What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |NEW Ever Confirmed|0 |1 Last reconfirmed|0000-00-00 00:00:00 |2008-01-07 12:19:54 date| | http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34682