Hi,

The current x32 implementation uses LEAs to convert 32bit address to
64bit.  However, we can use addr32 prefix to use 32bit address directly.
It improves performance by 5% in SPEC CPU 2K/2006.  All changes are done
in x86 backend, except for a smaill unwind library assert change:

http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01555.html

due to return column size difference.

For x86-64, Pmode can be 32bit or 64bit, but word_mode is always 64bit.
push/pop only work on word_mode.  Also string instructions take Pmode
pointers.

I will submit a set of patches to use 32bit Pmode for x32.  This is
the first patch to properly use Pmode and word_mode.  It also adds
addr32 prefix to string instructions if needed.  OK for trunk?

Thanks.


H.J.
---
2011-11-11  H.J. Lu  <hongjiu...@intel.com>

        * config/i386/i386.c (function_value_64): Return pointers in
        word_mode instead of Pmode.
        (ix86_promote_function_mode): Likewise.
        (setup_incoming_varargs_64): Use word_mode with integer
        parameters in registers.
        (gen_push): Push register in word_mode instead of Pmode.
        (ix86_emit_save_regs): Likewise.
        (ix86_emit_save_regs_using_mov): Save integer registers in
        word_mode.
        (gen_pop): Pop register in word_mode instead of Pmode.
        (ix86_emit_restore_regs_using_pop): Likewise.
        (ix86_expand_prologue): Replace Pmode with word_mode for push
        immediate.  Use ix86_gen_pro_epilogue_adjust_stack.  Save and
        restore RAX and R10 in word_mode.
        (ix86_emit_restore_regs_using_mov): Restore integer registers
        in word_mode.
        (ix86_expand_split_stack_prologue): Save R10_REG and restore in
        word_mode.
        (ix86_decompose_address): Disallow fs:(reg) if Pmode !=
        word_mode. 
        (legitimize_tls_address): Load TP into register for
        TLS_MODEL_INITIAL_EXEC and TLS_MODEL_LOCAL_EXEC modes in x32.
        (ix86_print_operand): Output register in DImode for 64bit
        indirect branch.
        (ix86_split_to_parts): Use word_mode with PUT_MODE for push.
        (ix86_split_long_move): Likewise.
        (ix86_zero_extend_to_Pmode): Handle Pmode != DImode.
        (ix86_expand_movmem): Use word_mode for size needed for loop.
        (ix86_trampoline_init): Use movl for 64bit if ptr_mode == SImode.
        Replace DImode with Pmode or ptr_mode.
        (x86_this_parameter): Replace DImode with Pmode.

        * config/i386/i386.md (W): New.
        (*push<mode>2_prologue): Replace :P with :W.
        (*pop<mode>1): Likewise.
        (*pop<mode>1_epilogue): Likewise.
        (*rep_movdi_rex64): Replace :DI with :P.  Add addr32 if needed.
        (*rep_stosdi_rex64): Likewise.
        (*rep_movsi): Add addr32 if needed.
        (*rep_movqi): Likewise.
        (*rep_stossi): Likewise.
        (*rep_stosqi): Likewise.
        (*cmpstrnqi_nz_1): Likewise.
        (*cmpstrnqi_1): Likewise.
        (*strlenqi_1): Likewise.
        (push/pop peephole2): Use word_mode scratch registers.
        (lwp_slwpcb): Check Pmode instead of TARGET_64BIT.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 01f4fbe..fd82389 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7193,8 +7193,8 @@ function_value_64 (enum machine_mode orig_mode, enum 
machine_mode mode,
     }
   else if (POINTER_TYPE_P (valtype))
     {
-      /* Pointers are always returned in Pmode. */
-      mode = Pmode;
+      /* Pointers are always returned in word_mode.  */
+      mode = word_mode;
     }
 
   ret = construct_container (mode, orig_mode, valtype, 1,
@@ -7265,7 +7265,8 @@ ix86_function_value (const_tree valtype, const_tree 
fntype_or_decl,
   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
 }
 
-/* Pointer function arguments and return values are promoted to Pmode.  */
+/* Pointer function arguments and return values are promoted to
+   word_mode.  */
 
 static enum machine_mode
 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
@@ -7275,7 +7276,7 @@ ix86_promote_function_mode (const_tree type, enum 
machine_mode mode,
   if (type != NULL_TREE && POINTER_TYPE_P (type))
     {
       *punsignedp = POINTERS_EXTEND_UNSIGNED;
-      return Pmode;
+      return word_mode;
     }
   return default_promote_function_mode (type, mode, punsignedp, fntype,
                                        for_return);
@@ -7553,12 +7554,13 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
 
   for (i = cum->regno; i < max; i++)
     {
-      mem = gen_rtx_MEM (Pmode,
+      mem = gen_rtx_MEM (word_mode,
                         plus_constant (save_area, i * UNITS_PER_WORD));
       MEM_NOTRAP_P (mem) = 1;
       set_mem_alias_set (mem, set);
-      emit_move_insn (mem, gen_rtx_REG (Pmode,
-                                       x86_64_int_parameter_registers[i]));
+      emit_move_insn (mem,
+                     gen_rtx_REG (word_mode,
+                                  x86_64_int_parameter_registers[i]));
     }
 
   if (ix86_varargs_fpr_size)
@@ -8613,8 +8615,11 @@ gen_push (rtx arg)
     m->fs.cfa_offset += UNITS_PER_WORD;
   m->fs.sp_offset += UNITS_PER_WORD;
 
+  if (REG_P (arg) && GET_MODE (arg) != word_mode)
+    arg = gen_rtx_REG (word_mode, REGNO (arg));
+
   return gen_rtx_SET (VOIDmode,
-                     gen_rtx_MEM (Pmode,
+                     gen_rtx_MEM (word_mode,
                                   gen_rtx_PRE_DEC (Pmode,
                                                    stack_pointer_rtx)),
                      arg);
@@ -8625,9 +8630,12 @@ gen_push (rtx arg)
 static rtx
 gen_pop (rtx arg)
 {
+  if (REG_P (arg) && GET_MODE (arg) != word_mode)
+    arg = gen_rtx_REG (word_mode, REGNO (arg));
+
   return gen_rtx_SET (VOIDmode,
                      arg,
-                     gen_rtx_MEM (Pmode,
+                     gen_rtx_MEM (word_mode,
                                   gen_rtx_POST_INC (Pmode,
                                                     stack_pointer_rtx)));
 }
@@ -9094,7 +9102,7 @@ ix86_emit_save_regs (void)
   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
       {
-       insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+       insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
        RTX_FRAME_RELATED_P (insn) = 1;
       }
 }
@@ -9174,7 +9182,7 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
       {
-        ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
+        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
        cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -10055,7 +10063,7 @@ ix86_expand_prologue (void)
         to implement macro RETURN_ADDR_RTX and intrinsic function
         expand_builtin_return_addr etc.  */
       t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
-      t = gen_frame_mem (Pmode, t);
+      t = gen_frame_mem (word_mode, t);
       insn = emit_insn (gen_push (t));
       RTX_FRAME_RELATED_P (insn) = 1;
 
@@ -10252,14 +10260,18 @@ ix86_expand_prologue (void)
       if (r10_live && eax_live)
         {
          t = choose_baseaddr (m->fs.sp_offset - allocate);
-         emit_move_insn (r10, gen_frame_mem (Pmode, t));
+         emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
+                         gen_frame_mem (word_mode, t));
          t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
-         emit_move_insn (eax, gen_frame_mem (Pmode, t));
+         emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
+                         gen_frame_mem (word_mode, t));
        }
       else if (eax_live || r10_live)
        {
          t = choose_baseaddr (m->fs.sp_offset - allocate);
-         emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
+         emit_move_insn (gen_rtx_REG (word_mode,
+                                      (eax_live ? AX_REG : R10_REG)),
+                         gen_frame_mem (word_mode, t));
        }
     }
   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
@@ -10429,7 +10441,7 @@ ix86_emit_restore_regs_using_pop (void)
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
-      ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
+      ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
 }
 
 /* Emit code and notes for the LEAVE instruction.  */
@@ -10472,11 +10484,11 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT 
cfa_offset,
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
       {
-       rtx reg = gen_rtx_REG (Pmode, regno);
+       rtx reg = gen_rtx_REG (word_mode, regno);
        rtx insn, mem;
 
        mem = choose_baseaddr (cfa_offset);
-       mem = gen_frame_mem (Pmode, mem);
+       mem = gen_frame_mem (word_mode, mem);
        insn = emit_move_insn (reg, mem);
 
         if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
@@ -11060,8 +11072,8 @@ ix86_expand_split_stack_prologue (void)
        {
          rtx rax;
 
-         rax = gen_rtx_REG (Pmode, AX_REG);
-         emit_move_insn (rax, reg10);
+         rax = gen_rtx_REG (word_mode, AX_REG);
+         emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
          use_reg (&call_fusage, rax);
        }
 
@@ -11140,8 +11152,8 @@ ix86_expand_split_stack_prologue (void)
   /* If we are in 64-bit mode and this function uses a static chain,
      we saved %r10 in %rax before calling _morestack.  */
   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
-    emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
-                   gen_rtx_REG (Pmode, AX_REG));
+    emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
+                   gen_rtx_REG (word_mode, AX_REG));
 
   /* If this function calls va_start, we need to store a pointer to
      the arguments on the old stack, because they may not have been
@@ -11388,6 +11400,11 @@ ix86_decompose_address (rtx addr, struct ix86_address 
*out)
   else
     disp = addr;                       /* displacement */
 
+  /* Since address override works only on the (reg) part in fs:(reg),
+     we can't use it as memory operand.  */
+  if (Pmode != word_mode && seg == SEG_FS && (base || index))
+    return 0;
+
   if (index)
     {
       if (REG_P (index))
@@ -12501,8 +12518,19 @@ legitimize_tls_address (rtx x, enum tls_model model, 
bool for_mov)
       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
        {
           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
-         off = force_reg (Pmode, off);
-         return gen_rtx_PLUS (Pmode, base, off);
+         if (Pmode != word_mode)
+           {
+             /* Since address override works only on the (reg) part in
+                fs:(reg), we can't use it as memory operand.  */
+             rtx reg = gen_reg_rtx (Pmode);
+             emit_move_insn (reg, base);
+             return gen_rtx_PLUS (Pmode, reg, off);
+           }
+         else
+           {
+             off = force_reg (Pmode, off);
+             return gen_rtx_PLUS (Pmode, base, off);
+           }
        }
       else
        {
@@ -13637,7 +13665,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
              gcc_unreachable ();
            }
 
-         ix86_print_operand (file, x, 0);
+         ix86_print_operand (file, x,
+                             TARGET_64BIT && REG_P (x) ? 'q' : 0);
          return;
 
 
@@ -19987,7 +20016,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum 
machine_mode mode)
       gcc_assert (ok);
 
       operand = copy_rtx (operand);
-      PUT_MODE (operand, Pmode);
+      PUT_MODE (operand, word_mode);
       parts[0] = parts[1] = parts[2] = parts[3] = operand;
       return size;
     }
@@ -20140,7 +20169,7 @@ ix86_split_long_move (rtx operands[])
       if (push_operand (operands[0], VOIDmode))
        {
          operands[0] = copy_rtx (operands[0]);
-         PUT_MODE (operands[0], Pmode);
+         PUT_MODE (operands[0], word_mode);
        }
       else
         operands[0] = gen_lowpart (DImode, operands[0]);
@@ -20701,7 +20730,11 @@ ix86_zero_extend_to_Pmode (rtx exp)
   if (GET_MODE (exp) == Pmode)
     return copy_to_mode_reg (Pmode, exp);
   r = gen_reg_rtx (Pmode);
-  emit_insn (gen_zero_extendsidi2 (r, exp));
+  if (Pmode == DImode)
+    emit_insn (gen_zero_extendsidi2 (r, exp));
+  else
+    emit_move_insn (r,
+                   simplify_gen_subreg (Pmode, exp, GET_MODE (exp), 0));
   return r;
 }
 
@@ -21730,11 +21763,11 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
       gcc_unreachable ();
     case loop:
       need_zero_guard = true;
-      size_needed = GET_MODE_SIZE (Pmode);
+      size_needed = GET_MODE_SIZE (word_mode);
       break;
     case unrolled_loop:
       need_zero_guard = true;
-      size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
+      size_needed = GET_MODE_SIZE (word_mode) * (TARGET_64BIT ? 4 : 2);
       break;
     case rep_prefix_8_byte:
       size_needed = 8;
@@ -23979,10 +24012,13 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
       /* Load the function address to r11.  Try to load address using
         the shorter movl instead of movabs.  We may want to support
         movq for kernel mode, but kernel does not use trampolines at
-        the moment.  */
-      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
+        the moment.  FNADDR is a 32bit address and may not be in
+        DImode when ptr_mode == SImode.  Always use movl in this
+        case.  */
+      if (ptr_mode == SImode
+         || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
        {
-         fnaddr = copy_to_mode_reg (DImode, fnaddr);
+         fnaddr = copy_to_mode_reg (Pmode, fnaddr);
 
          mem = adjust_address (m_tramp, HImode, offset);
          emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
@@ -24001,9 +24037,9 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
          offset += 10;
        }
 
-      /* Load static chain using movabs to r10.  Use the
-        shorter movl instead of movabs for x32.  */
-      if (TARGET_X32)
+      /* Load static chain using movabs to r10.  Use the shorter movl
+         instead of movabs when ptr_mode == SImode.  */
+      if (ptr_mode == SImode)
        {
          opcode = 0xba41;
          size = 6;
@@ -31077,7 +31113,7 @@ x86_this_parameter (tree function)
         parm_regs = x86_64_ms_abi_int_parameter_registers;
       else
         parm_regs = x86_64_int_parameter_registers;
-      return gen_rtx_REG (DImode, parm_regs[aggr]);
+      return gen_rtx_REG (Pmode, parm_regs[aggr]);
     }
 
   nregs = ix86_function_regparm (type, function);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 11c866b..9d2d084 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -973,6 +973,11 @@
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
 
+;; This mode iterator allows :W to be used for patterns that operate on
+;; word_mode sized quantities.
+(define_mode_iterator W
+  [(SI "word_mode == SImode") (DI "word_mode == DImode")])
+
 ;; This mode iterator allows :PTR to be used for patterns that operate on
 ;; ptr_mode sized quantities.
 (define_mode_iterator PTR
@@ -1781,8 +1786,8 @@
    (set_attr "mode" "SI")])
 
 (define_insn "*push<mode>2_prologue"
-  [(set (match_operand:P 0 "push_operand" "=<")
-       (match_operand:P 1 "general_no_elim_operand" "r<i>*m"))
+  [(set (match_operand:W 0 "push_operand" "=<")
+       (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
    (clobber (mem:BLK (scratch)))]
   ""
   "push{<imodesuffix>}\t%1"
@@ -1790,16 +1795,16 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*pop<mode>1"
-  [(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
-       (match_operand:P 1 "pop_operand" ">"))]
+  [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
+       (match_operand:W 1 "pop_operand" ">"))]
   ""
   "pop{<imodesuffix>}\t%0"
   [(set_attr "type" "pop")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*pop<mode>1_epilogue"
-  [(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
-       (match_operand:P 1 "pop_operand" ">"))
+  [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
+       (match_operand:W 1 "pop_operand" ">"))
    (clobber (mem:BLK (scratch)))]
   ""
   "pop{<imodesuffix>}\t%0"
@@ -15836,20 +15841,25 @@
   "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*rep_movdi_rex64"
-  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
-   (set (match_operand:DI 0 "register_operand" "=D")
-        (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
-                           (const_int 3))
-                (match_operand:DI 3 "register_operand" "0")))
-   (set (match_operand:DI 1 "register_operand" "=S")
-        (plus:DI (ashift:DI (match_dup 5) (const_int 3))
-                (match_operand:DI 4 "register_operand" "1")))
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
+                         (const_int 3))
+                (match_operand:P 3 "register_operand" "0")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (ashift:P (match_dup 5) (const_int 3))
+               (match_operand:P 4 "register_operand" "1")))
    (set (mem:BLK (match_dup 3))
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT
    && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
-  "rep{%;} movsq"
+{
+  if (Pmode == SImode)
+    return "addr32{%;} rep{%;} movsq";
+  else
+    return "rep{%;} movsq";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -15868,7 +15878,12 @@
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
-  "rep{%;} movs{l|d}"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} rep{%;} movs{l|d}";
+  else
+    return "rep{%;} movs{l|d}";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -15885,7 +15900,12 @@
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
-  "rep{%;} movsb"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} rep{%;} movsb";
+  else
+    return "rep{%;} movsb";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -16010,18 +16030,23 @@
   "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*rep_stosdi_rex64"
-  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
-   (set (match_operand:DI 0 "register_operand" "=D")
-        (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
-                           (const_int 3))
-                (match_operand:DI 3 "register_operand" "0")))
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
+                         (const_int 3))
+                (match_operand:P 3 "register_operand" "0")))
    (set (mem:BLK (match_dup 3))
        (const_int 0))
    (use (match_operand:DI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT
    && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
-  "rep{%;} stosq"
+{
+  if (Pmode == SImode)
+    return "addr32{%;} rep{%;} stosq";
+  else
+    return "rep{%;} stosq";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
@@ -16038,7 +16063,12 @@
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
-  "rep{%;} stos{l|d}"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} rep{%;} stos{l|d}";
+  else
+    return "rep{%;} stos{l|d}";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
@@ -16054,7 +16084,12 @@
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
-  "rep{%;} stosb"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} rep{%;} stosb";
+  else
+    return "rep{%;} stosb";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
@@ -16175,7 +16210,12 @@
    (clobber (match_operand:P 1 "register_operand" "=D"))
    (clobber (match_operand:P 2 "register_operand" "=c"))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
-  "repz{%;} cmpsb"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} repz{%;} cmpsb";
+  else
+    return "repz{%;} cmpsb";
+}
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set (attr "prefix_rex")
@@ -16215,7 +16255,12 @@
    (clobber (match_operand:P 1 "register_operand" "=D"))
    (clobber (match_operand:P 2 "register_operand" "=c"))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
-  "repz{%;} cmpsb"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} repz{%;} cmpsb";
+  else
+    return "repz{%;} cmpsb";
+}
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set (attr "prefix_rex")
@@ -16256,7 +16301,12 @@
    (clobber (match_operand:P 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
-  "repnz{%;} scasb"
+{
+  if (word_mode == DImode && Pmode == SImode)
+    return "addr32{%;} repnz{%;} scasb";
+  else
+    return "repnz{%;} scasb";
+}
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set (attr "prefix_rex")
@@ -17391,131 +17441,131 @@
 ;; alternative when no register is available later.
 
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
   "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
-   && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)"
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)"
   [(clobber (match_dup 1))
-   (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
              (clobber (mem:BLK (scratch)))])])
 
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
   "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
-   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)"
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)"
   [(clobber (match_dup 1))
-   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
-   (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
              (clobber (mem:BLK (scratch)))])])
 
 ;; Convert esp subtractions to push.
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
   "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
-   && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)"
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)"
   [(clobber (match_dup 1))
-   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
 
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
   "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
-   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)"
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)"
   [(clobber (match_dup 1))
-   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
-   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
 
 ;; Convert epilogue deallocator to pop.
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
   "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
-   && INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)"
-  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
+  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
              (clobber (mem:BLK (scratch)))])])
 
 ;; Two pops case is tricky, since pop causes dependency
 ;; on destination register.  We use two registers if available.
 (define_peephole2
-  [(match_scratch:P 1 "r")
-   (match_scratch:P 2 "r")
+  [(match_scratch:W 1 "r")
+   (match_scratch:W 2 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
   "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
-   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
-  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
              (clobber (mem:BLK (scratch)))])
-   (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))])
+   (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
 
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
   "optimize_insn_for_size_p ()
-   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
-  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
              (clobber (mem:BLK (scratch)))])
-   (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+   (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
 
 ;; Convert esp additions to pop.
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)"
-  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+  "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
+  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
 
 ;; Two pops case is tricky, since pop causes dependency
 ;; on destination register.  We use two registers if available.
 (define_peephole2
-  [(match_scratch:P 1 "r")
-   (match_scratch:P 2 "r")
+  [(match_scratch:W 1 "r")
+   (match_scratch:W 2 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
-  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
-   (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))])
+  "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
 
 (define_peephole2
-  [(match_scratch:P 1 "r")
+  [(match_scratch:W 1 "r")
    (parallel [(set (reg:P SP_REG)
                   (plus:P (reg:P SP_REG)
                           (match_operand:P 0 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
   "optimize_insn_for_size_p ()
-   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
-  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
-   (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
+  [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
 
 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
 ;; required and register dies.  Similarly for 128 to -128.
@@ -17626,7 +17676,7 @@
 ;;  leal    (%edx,%eax,4), %eax
 
 (define_peephole2
-  [(match_scratch:P 5 "r")
+  [(match_scratch:W 5 "r")
    (parallel [(set (match_operand 0 "register_operand" "")
                   (ashift (match_operand 1 "register_operand" "")
                           (match_operand 2 "const_int_operand" "")))
@@ -17652,16 +17702,16 @@
   enum machine_mode op1mode = GET_MODE (operands[1]);
   enum machine_mode mode = op1mode == DImode ? DImode : SImode;
   int scale = 1 << INTVAL (operands[2]);
-  rtx index = gen_lowpart (Pmode, operands[1]);
-  rtx base = gen_lowpart (Pmode, operands[5]);
+  rtx index = gen_lowpart (word_mode, operands[1]);
+  rtx base = gen_lowpart (word_mode, operands[5]);
   rtx dest = gen_lowpart (mode, operands[3]);
 
-  operands[1] = gen_rtx_PLUS (Pmode, base,
-                             gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+  operands[1] = gen_rtx_PLUS (word_mode, base,
+                             gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
   operands[5] = base;
-  if (mode != Pmode)
+  if (mode != word_mode)
     operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
-  if (op1mode != Pmode)
+  if (op1mode != word_mode)
     operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0);
   operands[0] = dest;
 })
@@ -18052,7 +18102,7 @@
 {
   rtx (*insn)(rtx);
 
-  insn = (TARGET_64BIT
+  insn = (Pmode == DImode
          ? gen_lwp_slwpcbdi
          : gen_lwp_slwpcbsi);
 

Reply via email to