The testcase from PR 8126 runs ~20% slower when compiled with -mfpmath=sse:
--cut here-- #include <stdio.h> typedef float real; int main (int argc, char *argv[]) { int i; real v1x, v1y, v1z; real v2x, v2y, v2z; real v3x, v3y, v3z; printf ("Start?\n"); v1x = 1.; v1y = 0.; v1z = 0.; v2x = 0.; v2y = 1.; v2z = 0.; for (i = 0; i < 100000000; i++) { v3x = v1y * v2z - v1z * v2y; v3y = v1z * v2x - v1x * v2z; v3z = v1x * v2y - v1y * v2x; v1x = v2x; v1y = v2y; v1z = v2z; v2x = v3x; v2y = v3y; v2z = v3z; } printf ("Stop!\n"); printf ("Result = %f, %f, %f\n", v3x, v3y, v3z); return 0; } --cut here-- gcc -O3 -march=pentium4 real 0m0.603s user 0m0.602s sys 0m0.002s gcc -O3 -march=pentium4 -mfpmath=sse real 0m0.726s user 0m0.727s sys 0m0.000s -- Summary: Floating point computation far slower for -mfpmath=sse Product: gcc Version: 4.0.0 Status: UNCONFIRMED Severity: normal Priority: P2 Component: target AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: uros at kss-loka dot si CC: gcc-bugs at gcc dot gnu dot org GCC build triplet: i686-pc-linux-gnu GCC host triplet: i686-pc-linux-gnu GCC target triplet: i686-pc-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19780