------- Comment #30 from bonzini at gnu dot org  2006-08-04 07:45 -------
Can you try this patch?  My only i686 machine is neutral to this problem.

I'm a bit worried about the Core Duo thing, but my hope is that other changes
between GCC 3 and GCC 4 improved performance on all machines, and Core Duo is
the only processor that does not see the performance loss introduced by "fld
%st".

I'm currently bootstrapping and regtesting the patch; a minimal testcase is
here:

/* { dg-do compile } */
/* { dg-options "-O2" } */

double a, b;
double f(double c)
{
  double x = a * b;
  return x + c * a;
}

/* { dg-final { scan-assembler-not "fld\[ \t\]*%st" } } */
/* { dg-final { scan-assembler "fmul\[ \t\]*%st" } } */

Without patch:
        fldl    a
        fld     %st(0)
        fmull   b
        fxch    %st(1)
        fmull   4(%esp)
        faddp   %st, %st(1)
        ret

With patch:
        fldl    a
        fldl    4(%esp)
        fmul    %st(1), %st
        fxch    %st(1)
        fmull   b
        faddp   %st, %st(1)
        ret

Index: i386.md
===================================================================
--- i386.md     (revision 115412)
+++ i386.md     (working copy)
@@ -18757,6 +18757,32 @@
   [(set_attr "type" "sseadd")
    (set_attr "mode" "DF")])

+;; Make two stack loads independent:
+;;   fld aa              fld aa
+;;   fld %st(0)     ->   fld bb
+;;   fmul bb             fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+  [(set (match_operand 0 "fp_register_operand" "")
+       (match_operand 1 "fp_register_operand" ""))
+   (set (match_dup 0)
+       (match_operator 2 "binary_fp_operator"
+          [(match_dup 0)
+           (match_operand 3 "memory_operand" "")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+
+  ;; The % modifier is not operational anymore in peephole2's, so we have to
+  ;; swap the operands manually in the case of addition and multiplication.
+  "if (COMMUTATIVE_ARITH_P (operands[2]))
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE
(operands[2]),
+                                operands[0], operands[1]);
+   else
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE
(operands[2]),
+                                operands[1], operands[0]);")
+
 ;; Conditional addition patterns
 (define_expand "addqicc"
   [(match_operand:QI 0 "register_operand" "")


-- 

bonzini at gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |bonzini at gnu dot org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27827

Reply via email to