Hello!

These two insns have implicit address operands, so when Pmode !=
word_mode (x32 with -maddress-mode=short) we have to instruct the
linker to emit 0x67 address override prefix.

The patch also changes *sse3_monitor pattern to emit mnemonic with
implicit operands, to avoid duplicating operands in reverse order for
Intel syntax.

2013-08-13  Uros Bizjak  <ubiz...@gmail.com>

    * config/i386/sse.md (*sse2_maskmovdqu): Emit addr32 prefix
    when Pmode != word_mode.  Add length_address attribute.
    (sse3_monitor_<mode>): Merge from sse3_monitor and
    sse3_monitor64_<mode> insn patterns.  Emit addr32 prefix when
    Pmode != word_mode.  Update insn length attribute.
    * config/i386/i386.c (ix86_option_override_internal): Update
    ix86_gen_monitor selection for merged sse3_monitor insn.

Patch was tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.
Index: i386.c
===================================================================
--- i386.c      (revision 201689)
+++ i386.c      (working copy)
@@ -4170,24 +4170,19 @@ ix86_option_override_internal (bool main_args_p)
       ix86_gen_leave = gen_leave_rex64;
       if (Pmode == DImode)
        {
-         ix86_gen_monitor = gen_sse3_monitor64_di;
          ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
          ix86_gen_tls_local_dynamic_base_64
            = gen_tls_local_dynamic_base_64_di;
        }
       else
        {
-         ix86_gen_monitor = gen_sse3_monitor64_si;
          ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
          ix86_gen_tls_local_dynamic_base_64
            = gen_tls_local_dynamic_base_64_si;
        }
     }
   else
-    {
-      ix86_gen_leave = gen_leave;
-      ix86_gen_monitor = gen_sse3_monitor;
-    }
+    ix86_gen_leave = gen_leave;
 
   if (Pmode == DImode)
     {
@@ -4199,6 +4194,7 @@ ix86_option_override_internal (bool main_args_p)
       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
       ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
+      ix86_gen_monitor = gen_sse3_monitor_di;
     }
   else
     {
@@ -4210,6 +4206,7 @@ ix86_option_override_internal (bool main_args_p)
       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
       ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
+      ix86_gen_monitor = gen_sse3_monitor_si;
     }
 
 #ifdef USE_IX86_CLD
Index: sse.md
===================================================================
--- sse.md      (revision 201689)
+++ sse.md      (working copy)
@@ -7731,9 +7731,17 @@
                       (mem:V16QI (match_dup 0))]
                      UNSPEC_MASKMOV))]
   "TARGET_SSE2"
-  "%vmaskmovdqu\t{%2, %1|%1, %2}"
+{
+  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
+     that requires %v to be at the beginning of the opcode name.  */
+  if (Pmode != word_mode)
+    fputs ("\taddr32", asm_out_file);
+  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
+}
   [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
+   (set (attr "length_address")
+     (symbol_ref ("Pmode != word_mode")))
    ;; The implicit %rdi operand confuses default length_vex computation.
    (set (attr "length_vex")
      (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
@@ -7781,26 +7789,18 @@
   "mwait"
   [(set_attr "length" "3")])
 
-(define_insn "sse3_monitor"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
-                    (match_operand:SI 1 "register_operand" "c")
-                    (match_operand:SI 2 "register_operand" "d")]
-                   UNSPECV_MONITOR)]
-  "TARGET_SSE3 && !TARGET_64BIT"
-  "monitor\t%0, %1, %2"
-  [(set_attr "length" "3")])
-
-(define_insn "sse3_monitor64_<mode>"
+(define_insn "sse3_monitor_<mode>"
   [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
                     (match_operand:SI 1 "register_operand" "c")
                     (match_operand:SI 2 "register_operand" "d")]
                    UNSPECV_MONITOR)]
-  "TARGET_SSE3 && TARGET_64BIT"
+  "TARGET_SSE3"
 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
 ;; RCX and RDX are used.  Since 32bit register operands are implicitly
 ;; zero extended to 64bit, we only need to set up 32bit registers.
-  "monitor"
-  [(set_attr "length" "3")])
+  "%^monitor"
+  [(set (attr "length")
+     (symbol_ref ("(Pmode != word_mode) + 3")))])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;

Reply via email to