Hello!

Attached patch prevents partial memory stalls for XFmode and DFmode
(32bit) immediate->memory moves by penalizing memory target in case of
immediate move. The patch also cleans move splitters that handle
immediate operands (most notably, there are no FP subregs after the
reload pass; ix86_split_long_move splitters can be simplified and
merged into one pattern).

2011-05-30  Uros Bizjak  <ubiz...@gmail.com>

        * config/i386/i386.md (*movxf_internal): Penalize FYx*r->o alternative
        to prevent partial memory stalls.  Do not move CONST_DOUBLEs directly
        to memory for !TARGET_MEMORY_MISMATCH_STALL.
        (*movdf_internal_rex64): Do not penalize F->r alternative.
        (*movdf_internal):  Penalize FYd*r->o alternative to prevent partial
        memory stalls.  Generate SSE and x87 CONST_DOUBLE immediates only
        when optimizing function for size.  Do not move CONST_DOUBLEs
        directly to memory for !TARGET_MEMORY_MISMATCH_STALL.
        (FP move splitters): Merge {TF,XF,DF}mode move splitters.  Do not
        handle SUBREGs.  Do not check for MEM_P operands in the insn condition,
        check for ANY_FP_REGNO_P instead.
        * config/i386/constraints.md (Yd): Enable GENERAL_REGS for
        TARGET_64BIT and for TARGET_INTEGER_DFMODE_MOVES when optimizing
        function for speed.
        * config/i386/i386.c (ix86_option_override_internal): Do not
        set TARGET_INTEGER_DFMODE_MOVES here.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}, committed to mainline SVN.

Uros.
Index: i386.md
===================================================================
--- i386.md     (revision 174435)
+++ i386.md     (working copy)
@@ -2833,8 +2833,8 @@
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
 (define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
-       (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r ,?o")
+       (match_operand:TF 1 "general_operand"      "xm,x,C,roF,Fr"))]
   "TARGET_SSE2
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
@@ -2877,24 +2877,19 @@
                   (const_string "TI"))]
               (const_string "DI")))])
 
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed
-   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
+;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movxf_internal"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,o")
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,!o")
        (match_operand:XF 1 "general_operand"      "fm,f,G,Yx*roF,FYx*r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || (optimize_function_for_size_p (cfun)
-          && standard_80387_constant_p (operands[1]) > 0)
-       || memory_operand (operands[0], XFmode))"
+          && standard_80387_constant_p (operands[1]) > 0
+          && !memory_operand (operands[0], XFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+          && memory_operand (operands[0], XFmode)))"
 {
   switch (which_alternative)
     {
@@ -2905,8 +2900,10 @@
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
-    case 3: case 4:
+    case 3:
+    case 4:
       return "#";
+
     default:
       gcc_unreachable ();
     }
@@ -2914,25 +2911,11 @@
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
 
-(define_split
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-       (match_operand:XF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (FP_REG_P (operands[0]) ||
-        (GET_CODE (operands[0]) == SUBREG
-         && FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (FP_REG_P (operands[1]) ||
-        (GET_CODE (operands[1]) == SUBREG
-         && FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movdf_internal_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+               "=f,m,f,r ,m,r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
        (match_operand:DF 1 "general_operand"
-               "fm,f,G,rm,r,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+               "fm,f,G,rm,r,F,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -3080,21 +3063,20 @@
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,Yd*r  ,o    ,Y2*x,Y2*x,Y2*x,m  ")
+               "=f,m,f,Yd*r  ,!o   ,Y2*x,Y2*x,Y2*x,m  ")
        (match_operand:DF 1 "general_operand"
                "fm,f,G,Yd*roF,FYd*r,C   ,Y2*x,m   ,Y2*x"))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || (!TARGET_INTEGER_DFMODE_MOVES
+       || (optimize_function_for_size_p (cfun)
           && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
                && standard_80387_constant_p (operands[1]) > 0)
               || (TARGET_SSE2 && TARGET_SSE_MATH
                   && standard_sse_constant_p (operands[1])))
           && !memory_operand (operands[0], DFmode))
-       || ((TARGET_INTEGER_DFMODE_MOVES
-           || !TARGET_MEMORY_MISMATCH_STALL)
+       || (!TARGET_MEMORY_MISMATCH_STALL
           && memory_operand (operands[0], DFmode)))"
 {
   switch (which_alternative)
@@ -3215,20 +3197,6 @@
              ]
              (const_string "DF")))])
 
-(define_split
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-       (match_operand:DF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (ANY_FP_REG_P (operands[0]) ||
-        (GET_CODE (operands[0]) == SUBREG
-         && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (ANY_FP_REG_P (operands[1]) ||
-        (GET_CODE (operands[1]) == SUBREG
-         && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
          "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
@@ -3331,31 +3299,19 @@
   [(set (match_operand 0 "register_operand" "")
        (match_operand 1 "memory_operand" ""))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode
        || GET_MODE (operands[0]) == SFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-       FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-       FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3363,31 +3319,18 @@
   [(set (match_operand 0 "register_operand" "")
        (float_extend (match_operand 1 "memory_operand" "")))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode
-       || GET_MODE (operands[0]) == SFmode)
+       || GET_MODE (operands[0]) == DFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-       FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-       FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3411,6 +3354,17 @@
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
+(define_split
+  [(set (match_operand 0 "nonimmediate_operand" "")
+        (match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
 (define_insn "swapxf"
   [(set (match_operand:XF 0 "register_operand" "+f")
        (match_operand:XF 1 "register_operand" "+f"))
@@ -16650,7 +16604,7 @@
   [(set (match_operand:SWI 0 "push_operand" "")
        (match_operand:SWI 1 "memory_operand" ""))
    (match_scratch:SWI 2 "<r>")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16661,7 +16615,7 @@
   [(set (match_operand:SF 0 "push_operand" "")
        (match_operand:SF 1 "memory_operand" ""))
    (match_scratch:SF 2 "r")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16813,7 +16767,7 @@
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
@@ -16826,7 +16780,7 @@
                      [(match_operand:SI 1 "memory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
@@ -16879,7 +16833,7 @@
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
@@ -16895,7 +16849,7 @@
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
Index: constraints.md
===================================================================
--- constraints.md      (revision 174435)
+++ constraints.md      (working copy)
@@ -108,7 +108,9 @@
  "@internal Any MMX register, when inter-unit moves are enabled.")
 
 (define_register_constraint "Yd"
- "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
+ "(TARGET_64BIT
+   || (TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun)))
+  ? GENERAL_REGS : NO_REGS"
  "@internal Any integer register when integer DFmode moves are enabled.")
 
 (define_register_constraint "Yx"
Index: i386.c
===================================================================
--- i386.c      (revision 174435)
+++ i386.c      (working copy)
@@ -3947,13 +3947,6 @@ ix86_option_override_internal (bool main
   if (!TARGET_80387)
     target_flags |= MASK_NO_FANCY_MATH_387;
 
-  /* On 32bit targets, avoid moving DFmode values in
-     integer registers when optimizing for size.  */
-  if (TARGET_64BIT)
-    target_flags |= TARGET_INTEGER_DFMODE_MOVES;
-  else if (optimize_size)
-    target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
-
   /* Turn on MMX builtins for -msse.  */
   if (TARGET_SSE)
     {

Reply via email to