------- Comment #30 from bonzini at gnu dot org 2006-08-04 07:45 ------- Can you try this patch? My only i686 machine is neutral to this problem.
I'm a bit worried about the Core Duo thing, but my hope is that other changes between GCC 3 and GCC 4 improved performance on all machines, and Core Duo is the only processor that does not see the performance loss introduced by "fld %st". I'm currently bootstrapping and regtesting the patch; a minimal testcase is here: /* { dg-do compile } */ /* { dg-options "-O2" } */ double a, b; double f(double c) { double x = a * b; return x + c * a; } /* { dg-final { scan-assembler-not "fld\[ \t\]*%st" } } */ /* { dg-final { scan-assembler "fmul\[ \t\]*%st" } } */ Without patch: fldl a fld %st(0) fmull b fxch %st(1) fmull 4(%esp) faddp %st, %st(1) ret With patch: fldl a fldl 4(%esp) fmul %st(1), %st fxch %st(1) fmull b faddp %st, %st(1) ret Index: i386.md =================================================================== --- i386.md (revision 115412) +++ i386.md (working copy) @@ -18757,6 +18757,32 @@ [(set_attr "type" "sseadd") (set_attr "mode" "DF")]) +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand" "") + (match_operand 1 "fp_register_operand" "")) + (set (match_dup 0) + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] + + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. + "if (COMMUTATIVE_ARITH_P (operands[2])) + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[0], operands[1]); + else + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[1], operands[0]);") + ;; Conditional addition patterns (define_expand "addqicc" [(match_operand:QI 0 "register_operand" "") -- bonzini at gnu dot org changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |bonzini at gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27827