Hello!

Attached patch optimizes ix86_atomic_assign_expand_fenv by using
register form of "fnstsw %ax" instead of "fnstsw <mem>". This way a
memory RW cycle is removed for a temporary.

2014-07-12  Uros Bizjak  <ubiz...@gmail.com>

    * config/i386/i386-builtin-types.def: Add USHORT_FTYPE_VOID.
    Remove VOID_FTYPE_PUSHORT.
    * config/i386/i386.c (bdesc_special_args) <__builtin_ia32_fnstsw>:
    Change code to USHORT_FTYPE_VOID.
    (ix86_expand_special_args_builtin): Handle USHORT_FTYPE_VOID.
    (ix86_expand_builtin): Remove IX86_BUILTIN_FNSTSW handling.
    (ix86_atomic_assign_expand_fenv): Update for
    __builtin_ia32_fnstsw changes.
    * config/i386/i386.md (x86_fnstsw_1): Set length unconditionally to 2.
    (fnstsw): Change operand 0 to nonimmediate operand.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} and
committed to mainline SVN.

Uros.
Index: config/alpha/alpha.c
===================================================================
--- config/alpha/alpha.c        (revision 212477)
+++ config/alpha/alpha.c        (working copy)
@@ -9888,6 +9888,72 @@ alpha_canonicalize_comparison (int *code, rtx *op0
       *op1 = GEN_INT (255);
     }
 }
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
+
+  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
+  tree new_fenv_var, reload_fenv, restore_fnenv;
+  tree update_call, atomic_feraiseexcept, hold_fnclex;
+
+  /* Assume OSF/1 compatible interfaces.  */
+  if (!TARGET_ABI_OSF)
+    return;
+
+  /* Generate the equivalent of :
+       unsigned long fenv_var;
+       fenv_var = __ieee_get_fp_control ();
+
+       unsigned long masked_fenv;
+       masked_fenv = fenv_var & mask;
+
+       __ieee_set_fp_control (masked_fenv);  */
+
+  fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+  get_fpscr
+    = build_fn_decl ("__ieee_get_fp_control",
+                    build_function_type_list (long_unsigned_type_node, NULL));
+  set_fpscr
+    = build_fn_decl ("__ieee_set_fp_control",
+                    build_function_type_list (void_type_node, NULL));
+  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
+  ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
+                   fenv_var, build_call_expr (get_fpscr, 0));
+  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
+  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+                 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+                 hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __ieee_set_fp_control (masked_fenv);  */
+
+  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
+
+  /* Generate the equivalent of :
+       unsigned long new_fenv_var;
+       new_fenv_var = __ieee_get_fp_control ();
+
+       __ieee_set_fp_control (fenv_var);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
+                       build_call_expr (get_fpscr, 0));
+  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call
+    = build_call_expr (atomic_feraiseexcept, 1,
+                      fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+                   build2 (COMPOUND_EXPR, void_type_node,
+                           reload_fenv, restore_fnenv), update_call);
+}
 
 /* Initialize the GCC target structure.  */
 #if TARGET_ABI_OPEN_VMS
@@ -10060,6 +10126,9 @@ alpha_canonicalize_comparison (int *code, rtx *op0
 #undef TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
Index: config/i386/i386-builtin-types.def
===================================================================
--- config/i386/i386-builtin-types.def  (revision 212477)
+++ config/i386/i386-builtin-types.def  (working copy)
@@ -162,6 +162,7 @@ DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
 DEF_FUNCTION_TYPE (FLOAT128)
 DEF_FUNCTION_TYPE (UINT64)
 DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (USHORT)
 DEF_FUNCTION_TYPE (INT)
 DEF_FUNCTION_TYPE (VOID)
 DEF_FUNCTION_TYPE (PVOID)
@@ -256,7 +257,6 @@ DEF_FUNCTION_TYPE (VOID, PCVOID)
 DEF_FUNCTION_TYPE (VOID, PVOID)
 DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
-DEF_FUNCTION_TYPE (VOID, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUNSIGNED)
 DEF_FUNCTION_TYPE (INT, PULONGLONG)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 212477)
+++ config/i386/i386.c  (working copy)
@@ -28994,7 +28994,7 @@ static const struct builtin_description bdesc_spec
   /* 80387 (for use internally for atomic compound assignment).  */
   { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, 
UNKNOWN, (int) VOID_FTYPE_PVOID },
   { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, 
(int) VOID_FTYPE_PCVOID },
-  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, 
(int) VOID_FTYPE_PUSHORT },
+  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, 
(int) USHORT_FTYPE_VOID },
   { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, 
(int) VOID_FTYPE_VOID },
 
   /* MMX */
@@ -34598,6 +34598,7 @@ ix86_expand_special_args_builtin (const struct bui
       break;
 
     case INT_FTYPE_VOID:
+    case USHORT_FTYPE_VOID:
     case UINT64_FTYPE_VOID:
     case UNSIGNED_FTYPE_VOID:
       nargs = 0;
@@ -35283,7 +35284,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
     case IX86_BUILTIN_FXRSTOR64:
     case IX86_BUILTIN_FNSTENV:
     case IX86_BUILTIN_FLDENV:
-    case IX86_BUILTIN_FNSTSW:
       mode0 = BLKmode;
       switch (fcode)
        {
@@ -35305,10 +35305,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
        case IX86_BUILTIN_FLDENV:
          icode = CODE_FOR_fldenv;
          break;
-       case IX86_BUILTIN_FNSTSW:
-         icode = CODE_FOR_fnstsw;
-         mode0 = HImode;
-         break;
        default:
          gcc_unreachable ();
        }
@@ -46894,15 +46890,14 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *
                      hold_fnclex);
       *clear = build_call_expr (fnclex, 0);
       tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
-      mark_addressable (sw_var);
-      tree su_ptr = build_pointer_type (short_unsigned_type_node);
-      tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
-      tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
+      tree fnstsw_call = build_call_expr (fnstsw, 0);
+      tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
+                           sw_var, fnstsw_call);
       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
       tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
                                exceptions_var, exceptions_x87);
       *update = build2 (COMPOUND_EXPR, integer_type_node,
-                       fnstsw_call, update_mod);
+                       sw_mod, update_mod);
       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
     }
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 212477)
+++ config/i386/i386.md (working copy)
@@ -1532,8 +1532,7 @@
        (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
-  [(set (attr "length")
-       (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+  [(set_attr "length" "2")
    (set_attr "mode" "SI")
    (set_attr "unit" "i387")])
 
@@ -18114,12 +18113,12 @@
         (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
 
 (define_insn "fnstsw"
-  [(set (match_operand:HI 0 "memory_operand" "=m")
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
        (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
-  [(set_attr "type" "other")
-   (set_attr "memory" "store")
+  [(set_attr "type" "other,other")
+   (set_attr "memory" "none,store")
    (set (attr "length")
         (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
 

Reply via email to