On Fri, Jun 02, 2006 at 09:24:17AM +0200, Rask Ingemann Lambertsen wrote:

> The rest of the ARM backend presently assumes that the pattern has the form
> 
> (set (operand:QI 0) (operand:QI 1))
> 
> but now we've changed it to
> 
> (parallel [(set (operand:QI 0) (operand:QI 1))
>          (clobber (operand:QI 2))
> ])
> 
> so that's why you get "unrecognizable insn" errors now. Any place which
> intended to generate an *arm_movqi_insn has to add a clobber also. For a
> start, this means the "movqi" pattern.

I've now implemented it. This brings a small improvement to the code
generated for bytewritetest:

bytewritetest:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        ldrb    r3, [r0, #5]    @ zero_extendqisi2
        ldrb    ip, [r0, #4]    @ zero_extendqisi2
        ldr     r2, [r0, #0]
        add     r1, r3, ip
        str     r2, [r0, #8]
        str     r1, [r0], #5    <--
        eor     r3, r3, ip
        swpb    r2, r3, [r0]
        @ lr needed for prologue
        bx      lr

Exactly the same number of instructions as without -mswp-byte-writes because
of postincrement. Basicly, it pays off to get the insn expanded correctly to
begin with, rather than leaving it to reload to fix it up later. This should
work fine with volatile variables because there is no need to read back from
memory. The peephole optimizations are gone for the same reason. I do wonder
if the ability to reuse the input register as a scratch register has been
preserved, though.

Compiling unwind-dw2-fde.c, I noticed that the code produced for
__register_frame_info_table_bases() differs more than expected:

__register_frame_info_table_bases:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
 1      stmfd   sp!, {r4, lr}
 2      mov     lr, #0
 3      str     lr, [r1, #16]
 4      ldrb    ip, [r1, #16]   @ zero_extendqisi2
 5      orr     ip, ip, #2
 6      strb    ip, [r1, #16]
 7      ldr     r4, .L28
 8      ldrh    ip, [r1, #16]
 9      ldr     lr, [r4, #0]
10      orr     ip, ip, #2032
11      str     r0, [r1, #12]
12      orr     ip, ip, #8
13      mvn     r0, #0
14      strh    ip, [r1, #16]   @ movhi
15      str     lr, [r1, #20]
16      str     r0, [r1, #0]
17      str     r1, [r4, #0]
18      stmib   r1, {r2, r3}    @ phole stm
19      ldmfd   sp!, {r4, pc}

vs.

__register_frame_info_table_bases:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
 2      mov     ip, #0
 3      str     ip, [r1, #16]
 1      str     lr, [sp, #-4]!
 4      ldrb    lr, [r1, #16]   @ zero_extendqisi2
11      str     r0, [r1, #12]
 5      orr     lr, lr, #2
13      mvn     r0, #0
 6a     add     ip, r1, #16
16+18?  stmia   r1, {r0, r2, r3}        @ phole stm
 6b     swpb    r3, lr, [ip]
 7      ldr     r0, .L28
 8      ldrh    r3, [r1, #16]
 9      ldr     r2, [r0, #0]
10      orr     r3, r3, #2032
12      orr     r3, r3, #8
14      strh    r3, [r1, #16]   @ movhi
15      str     r2, [r1, #20]
17      str     r1, [r0, #0]
19      ldr     pc, [sp], #4

But the swp version seems to be equivalent, doesn't it?

I'm not sure that the reload_outqi expander will correctly handle
cases where reload spills a register to memory. If the memory address
doesn't have the right form, it becomes more complicated.

Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h        (revision 114119)
+++ gcc/config/arm/arm.h        (working copy)
@@ -1094,6 +1094,8 @@
    ? vfp_secondary_reload_class (MODE, X)                      \
    : TARGET_ARM                                                        \
    ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
+   || ((MODE) == QImode && TARGET_ARM && TARGET_SWP_BYTE_WRITES        \
+       && true_regnum (X) == -1)                               \
     ? GENERAL_REGS : NO_REGS)                                  \
    : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
 
Index: gcc/config/arm/arm.opt
===================================================================
--- gcc/config/arm/arm.opt      (revision 114119)
+++ gcc/config/arm/arm.opt      (working copy)
@@ -153,3 +153,7 @@
 mwords-little-endian
 Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words
+
+mswp-byte-writes
+Target Report Mask(SWP_BYTE_WRITES)
+Use the swp instruction for byte writes. The default is to use str
Index: gcc/config/arm/predicates.md
===================================================================
--- gcc/config/arm/predicates.md        (revision 114119)
+++ gcc/config/arm/predicates.md        (working copy)
@@ -125,6 +125,14 @@
                         || (GET_CODE (op) == REG
                             && REGNO (op) >= FIRST_PSEUDO_REGISTER)))")))
 
+;; Match register operands or memory operands of the form (mem (reg ...)),
+;; as permitted by the "Q" memory constraint.
+(define_predicate "reg_or_Qmem_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "mem")
+           (match_code "reg" "0")))
+)
+
 ;; True for valid operands for the rhs of an floating point insns.
 ;;   Allows regs or certain consts on FPA, just regs for everything else.
 (define_predicate "arm_float_rhs_operand"
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md       (revision 114119)
+++ gcc/config/arm/arm.md       (working copy)
@@ -5151,6 +5151,16 @@
       emit_insn (gen_movsi (operands[0], operands[1]));
       DONE;
     }
+  if (TARGET_ARM && TARGET_SWP_BYTE_WRITES)
+    {
+      /* Ensure that operands[0] is (mem (reg ...)) if a memory operand. */
+      if (MEM_P (operands[0]) && !REG_P (XEXP (operands[0], 0)))
+           operands[0]
+             = replace_equiv_address (operands[0],
+                                      copy_to_reg (XEXP (operands[0], 0)));
+      emit_insn (gen__arm_movqi_insn_swp (operands[0], operands[1]));
+      DONE;
+    }
   "
 )
 
@@ -5158,7 +5168,7 @@
 (define_insn "*arm_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
        (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
-  "TARGET_ARM
+  "TARGET_ARM && !TARGET_SWP_BYTE_WRITES
    && (   register_operand (operands[0], QImode)
        || register_operand (operands[1], QImode))"
   "@
@@ -5170,6 +5180,31 @@
    (set_attr "predicable" "yes")]
 )
 
+;; This is primarily a hack for the Nintendo DS external RAM.
+(define_insn "_arm_movqi_insn_swp"
+  [(set (match_operand:QI 0 "reg_or_Qmem_operand" "=r,r,r,Q")
+       (match_operand:QI 1 "general_operand" "rI,K,m,r"))
+        (clobber (match_scratch:QI 2 "=X,X,X,r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   ldr%?b\\t%0, %1
+   swp%?b\\t%2, %1, [%|%m0]"
+  [(set_attr "type" "*,*,load1,store1")
+   (set_attr "predicable" "yes")]
+)
+
+;; The earlyclobber is required by default_secondary_reload() in targhooks.c.
+(define_expand "reload_outqi"
+  [(set (match_operand:QI 0 "memory_operand" "=Q")
+       (match_operand:QI 1 "register_operand" "r"))
+   (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+)
+
 (define_insn "*thumb_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
        (match_operand:QI 1 "general_operand"      "l, m,l,*h,*r,I"))]


-- 
Rask Ingemann Lambertsen

Reply via email to