On Fri, Jun 02, 2006 at 09:24:17AM +0200, Rask Ingemann Lambertsen wrote: > The rest of the ARM backend presently assumes that the pattern has the form > > (set (operand:QI 0) (operand:QI 1)) > > but now we've changed it to > > (parallel [(set (operand:QI 0) (operand:QI 1)) > (clobber (operand:QI 2)) > ]) > > so that's why you get "unrecognizable insn" errors now. Any place which > intended to generate an *arm_movqi_insn has to add a clobber also. For a > start, this means the "movqi" pattern.
I've now implemented it. This brings a small improvement to the code generated for bytewritetest: bytewritetest: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. ldrb r3, [r0, #5] @ zero_extendqisi2 ldrb ip, [r0, #4] @ zero_extendqisi2 ldr r2, [r0, #0] add r1, r3, ip str r2, [r0, #8] str r1, [r0], #5 <-- eor r3, r3, ip swpb r2, r3, [r0] @ lr needed for prologue bx lr Exactly the same number of instructions as without -mswp-byte-writes because of postincrement. Basicly, it pays off to get the insn expanded correctly to begin with, rather than leaving it to reload to fix it up later. This should work fine with volatile variables because there is no need to read back from memory. The peephole optimizations are gone for the same reason. I do wonder if the ability to reuse the input register as a scratch register has been preserved, though. Compiling unwind-dw2-fde.c, I noticed that the code produced for __register_frame_info_table_bases() differs more than expected: __register_frame_info_table_bases: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 1 stmfd sp!, {r4, lr} 2 mov lr, #0 3 str lr, [r1, #16] 4 ldrb ip, [r1, #16] @ zero_extendqisi2 5 orr ip, ip, #2 6 strb ip, [r1, #16] 7 ldr r4, .L28 8 ldrh ip, [r1, #16] 9 ldr lr, [r4, #0] 10 orr ip, ip, #2032 11 str r0, [r1, #12] 12 orr ip, ip, #8 13 mvn r0, #0 14 strh ip, [r1, #16] @ movhi 15 str lr, [r1, #20] 16 str r0, [r1, #0] 17 str r1, [r4, #0] 18 stmib r1, {r2, r3} @ phole stm 19 ldmfd sp!, {r4, pc} vs. __register_frame_info_table_bases: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 2 mov ip, #0 3 str ip, [r1, #16] 1 str lr, [sp, #-4]! 4 ldrb lr, [r1, #16] @ zero_extendqisi2 11 str r0, [r1, #12] 5 orr lr, lr, #2 13 mvn r0, #0 6a add ip, r1, #16 16+18? stmia r1, {r0, r2, r3} @ phole stm 6b swpb r3, lr, [ip] 7 ldr r0, .L28 8 ldrh r3, [r1, #16] 9 ldr r2, [r0, #0] 10 orr r3, r3, #2032 12 orr r3, r3, #8 14 strh r3, [r1, #16] @ movhi 15 str r2, [r1, #20] 17 str r1, [r0, #0] 19 ldr pc, [sp], #4 But the swp version seems to be equivalent, doesn't it? I'm not sure that the reload_outqi expander will correctly handle cases where reload spills a register to memory. If the memory address doesn't have the right form, it becomes more complicated. Index: gcc/config/arm/arm.h =================================================================== --- gcc/config/arm/arm.h (revision 114119) +++ gcc/config/arm/arm.h (working copy) @@ -1094,6 +1094,8 @@ ? vfp_secondary_reload_class (MODE, X) \ : TARGET_ARM \ ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \ + || ((MODE) == QImode && TARGET_ARM && TARGET_SWP_BYTE_WRITES \ + && true_regnum (X) == -1) \ ? GENERAL_REGS : NO_REGS) \ : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X)) Index: gcc/config/arm/arm.opt =================================================================== --- gcc/config/arm/arm.opt (revision 114119) +++ gcc/config/arm/arm.opt (working copy) @@ -153,3 +153,7 @@ mwords-little-endian Target Report RejectNegative Mask(LITTLE_WORDS) Assume big endian bytes, little endian words + +mswp-byte-writes +Target Report Mask(SWP_BYTE_WRITES) +Use the swp instruction for byte writes. The default is to use str Index: gcc/config/arm/predicates.md =================================================================== --- gcc/config/arm/predicates.md (revision 114119) +++ gcc/config/arm/predicates.md (working copy) @@ -125,6 +125,14 @@ || (GET_CODE (op) == REG && REGNO (op) >= FIRST_PSEUDO_REGISTER)))"))) +;; Match register operands or memory operands of the form (mem (reg ...)), +;; as permitted by the "Q" memory constraint. +(define_predicate "reg_or_Qmem_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "mem") + (match_code "reg" "0"))) +) + ;; True for valid operands for the rhs of an floating point insns. ;; Allows regs or certain consts on FPA, just regs for everything else. (define_predicate "arm_float_rhs_operand" Index: gcc/config/arm/arm.md =================================================================== --- gcc/config/arm/arm.md (revision 114119) +++ gcc/config/arm/arm.md (working copy) @@ -5151,6 +5151,16 @@ emit_insn (gen_movsi (operands[0], operands[1])); DONE; } + if (TARGET_ARM && TARGET_SWP_BYTE_WRITES) + { + /* Ensure that operands[0] is (mem (reg ...)) if a memory operand. */ + if (MEM_P (operands[0]) && !REG_P (XEXP (operands[0], 0))) + operands[0] + = replace_equiv_address (operands[0], + copy_to_reg (XEXP (operands[0], 0))); + emit_insn (gen__arm_movqi_insn_swp (operands[0], operands[1])); + DONE; + } " ) @@ -5158,7 +5168,7 @@ (define_insn "*arm_movqi_insn" [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") (match_operand:QI 1 "general_operand" "rI,K,m,r"))] - "TARGET_ARM + "TARGET_ARM && !TARGET_SWP_BYTE_WRITES && ( register_operand (operands[0], QImode) || register_operand (operands[1], QImode))" "@ @@ -5170,6 +5180,31 @@ (set_attr "predicable" "yes")] ) +;; This is primarily a hack for the Nintendo DS external RAM. +(define_insn "_arm_movqi_insn_swp" + [(set (match_operand:QI 0 "reg_or_Qmem_operand" "=r,r,r,Q") + (match_operand:QI 1 "general_operand" "rI,K,m,r")) + (clobber (match_scratch:QI 2 "=X,X,X,r"))] + "TARGET_ARM && TARGET_SWP_BYTE_WRITES + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" + "@ + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + ldr%?b\\t%0, %1 + swp%?b\\t%2, %1, [%|%m0]" + [(set_attr "type" "*,*,load1,store1") + (set_attr "predicable" "yes")] +) + +;; The earlyclobber is required by default_secondary_reload() in targhooks.c. +(define_expand "reload_outqi" + [(set (match_operand:QI 0 "memory_operand" "=Q") + (match_operand:QI 1 "register_operand" "r")) + (clobber (match_operand:QI 2 "register_operand" "=&r"))] + "TARGET_ARM && TARGET_SWP_BYTE_WRITES" +) + (define_insn "*thumb_movqi_insn" [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l") (match_operand:QI 1 "general_operand" "l, m,l,*h,*r,I"))] -- Rask Ingemann Lambertsen