On Mon, Jun 05, 2006 at 01:47:10PM +0200, Wolfgang Mües wrote:

> I don't know why the form "[%r, #0]" was coded before, because the 
> assembler understands "[%r]" very well for all instructions. The form 
> "[%r]" has a wider usage because it covers swp too.

Does GCC happen to accept "[%r, #0]" for swp?

> Also, I wonder what the "Q" constraint really means:
> 
> from the GCC manual:
> 
> > Q
> > A memory reference where the exact address is in a single register
> > (``m'' is preferable for asm statements)
> 
> but in arm.h:
> 
> > /* For the ARM, `Q' means that this is a memory operand that is just
> >    an offset from a register.
> > #define EXTRA_CONSTRAINT_STR_ARM(OP, C, STR)                        \
> >    ((C) == 'Q') ? (GET_CODE (OP) == MEM                                     
> > \
> >              && GET_CODE (XEXP (OP, 0)) == REG) :                   \

I think the comment in arm.h is wrong. The manual seems to agree with the
code.

> Obviously, GCC tries to implement REG+CONSTANT with Q.
> 
> Maybe I must define a new constraint?

I tried 'V' instead, but it looks as if reload completely ignores the
meaning of the constraint. There is already a comment in arm.md about that.
It should be investigated further.

Meanwhile, I changed arm_legitimate_address_p() to enforce the correct
address form. This hurts byte loads too, though.

Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c        (revision 114119)
+++ gcc/config/arm/arm.c        (working copy)
@@ -3509,6 +3509,9 @@
   if (arm_address_register_rtx_p (x, strict_p))
     return 1;
 
+  if (TARGET_ARM && TARGET_SWP_BYTE_WRITES && mode == QImode && outer == SET)
+    return 0;
+
   use_ldrd = (TARGET_LDRD
              && (mode == DImode
                  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));

Index: gcc/config/arm/arm.opt
===================================================================
--- gcc/config/arm/arm.opt      (revision 114119)
+++ gcc/config/arm/arm.opt      (working copy)
@@ -153,3 +153,7 @@
 mwords-little-endian
 Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words
+
+mswp-byte-writes
+Target Report Mask(SWP_BYTE_WRITES)
+Use the swp instruction for byte writes

Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md       (revision 114119)
+++ gcc/config/arm/arm.md       (working copy)
@@ -5158,7 +5158,7 @@
 (define_insn "*arm_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
        (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
-  "TARGET_ARM
+  "TARGET_ARM && !TARGET_SWP_BYTE_WRITES
    && (   register_operand (operands[0], QImode)
        || register_operand (operands[1], QImode))"
   "@
@@ -5170,6 +5170,44 @@
    (set_attr "predicable" "yes")]
 )
 
+; This is for the Nintendo DS external RAM.
+(define_insn "*arm_movqi_insn_swp"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,Q")
+       (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   ldr%?b\\t%0, %1
+   swp%?b\\t%1, %1, %0\;ldr%?b\\t%1, %0"
+  [(set_attr "type" "*,*,load1,store1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_movqi_insn_swp_clobber"
+  [(set (match_operand:QI 0 "memory_operand" "=Q")
+        (match_operand:QI 1 "register_operand" "r"))
+   (clobber (match_operand:QI 2 "register_operand" "=r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+  "swp%?b\\t%2, %1, %0"
+  [(set_attr "type" "store1")
+   (set_attr "predicable" "yes")]
+)
+
+; Avoid reading the stored value back if we have a spare register.
+(define_peephole2
+  [(match_scratch:QI 2 "r")
+   (set (match_operand:QI 0 "memory_operand" "")
+        (match_operand:QI 1 "register_operand" ""))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+  [(parallel [
+    (set (match_dup 0) (match_dup 1))
+    (clobber (match_dup 2))]
+  )]
+)
+
 (define_insn "*thumb_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
        (match_operand:QI 1 "general_operand"      "l, m,l,*h,*r,I"))]

This seems to work as intended on a small test case:

struct foobar
{
        int  i1;
        char c1;
        char c2;
        int  i2;
};

void bytewritetest (struct foobar *x)
{
        x->i2 = x->i1;
        x->i1 = x->c1 + x->c2;
        x->c2 ^= x->c1;
}

With just -O2:

bytewritetest:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        ldrb    r3, [r0, #5]    @ zero_extendqisi2
        ldrb    r2, [r0, #4]    @ zero_extendqisi2
        ldr     ip, [r0, #0]
        eor     r1, r3, r2
        add     r3, r3, r2
        @ lr needed for prologue
        strb    r1, [r0, #5]
        str     ip, [r0, #8]
        str     r3, [r0, #0]
        bx      lr

With -O2 -mswp-byte-writes:

bytewritetest:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        str     lr, [sp, #-4]!
        add     r2, r0, #4
        add     lr, r0, #5
        ldrb    r3, [lr, #0]    @ zero_extendqisi2
        ldrb    r1, [r2, #0]    @ zero_extendqisi2
        eor     r2, r1, r3
        add     r3, r3, r1
        ldr     ip, [r0, #0]
        str     r3, [r0, #0]
        swpb    r3, r2, [lr, #0]
        str     ip, [r0, #8]
        ldr     pc, [sp], #4


The register allocator chooses to use the lr register, in turn causing link
register save alimination to fail, which doesn't help.

-- 
Rask Ingemann Lambertsen

Reply via email to