You are better off CCing the maintainers for such reviews. Let me do that for you. I cannot approve or reject this patch but I have a few comments as below.
On 10/29/13 09:22, Hurugalawadi, Naveen wrote:
diff -uprN '-x*.orig' mainline-orig/gcc/config/aarch64/aarch64.md gcc-4.8.0/gcc/config/aarch64/aarch64.md --- mainline-orig/gcc/config/aarch64/aarch64.md 2013-10-28 17:15:52.363975264 +0530 +++ gcc-4.8.0/gcc/config/aarch64/aarch64.md 2013-10-29 10:11:09.656082201 +0530 @@ -1068,6 +1068,27 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_mem_pair_operand")) + (set (match_operand:GPI 2 "register_operand") + (match_operand:GPI 3 "memory_operand"))] + "GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG
Use the predicates REG_P , CONST_INT_P and so on. Don't do this in this form.
+ && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT + && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0)) + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[2])) + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size"
Why use optimize_size here ? I would have thought it was always good to generate ldp and stp instructions. If you want to make it a core specific decision make it a tuning decision but don't disable just based on size.
Additionally if this check is common between all the patterns it would be better to factor this out into a predicate or a function in its own right.
+ [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "store_pair<mode>" @@ -1085,6 +1106,27 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPI 0 "aarch64_mem_pair_operand") + (match_operand:GPI 1 "register_operand")) + (set (match_operand:GPI 2 "memory_operand") + (match_operand:GPI 3 "register_operand"))] + "GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT + && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0)) + && REGNO (operands[1]) != REGNO (operands[3]) + && REGNO_REG_CLASS (REGNO (operands[1])) + == REGNO_REG_CLASS (REGNO (operands[3])) + && rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "load_pair<mode>" @@ -1102,6 +1144,28 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "aarch64_mem_pair_operand")) + (set (match_operand:GPF 2 "register_operand") + (match_operand:GPF 3 "memory_operand"))] + "GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT + && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0)) + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO (operands[0]) >= 32 && REGNO (operands[2]) >= 32
I'm not sure what this means here but this check doesn't look right - 32 is V0_REGNUM - why are you checking that here ? If you really need to do that check then use V0_REGNUM or it may be better to see if this really is a SIMD regnum ? Can you not use something like FP_REGNUM_P here instead ?
+ && REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[2])) + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, XEXP (operands[1], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "store_pair<mode>" @@ -1119,6 +1183,28 @@ (set_attr "mode" "<MODE>")] ) +(define_peephole2 + [(set (match_operand:GPF 0 "aarch64_mem_pair_operand") + (match_operand:GPF 1 "register_operand")) + (set (match_operand:GPF 2 "memory_operand") + (match_operand:GPF 3 "register_operand"))] + "GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT + && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0)) + && REGNO (operands[1]) != REGNO (operands[3]) + && REGNO (operands[1]) >= 32 && REGNO (operands[3]) >= 32 + && REGNO_REG_CLASS (REGNO (operands[1])) + == REGNO_REG_CLASS (REGNO (operands[3])) + && rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, XEXP (operands[0], 0), + GET_MODE_SIZE (<MODE>mode))) + && optimize_size" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))])] +) + ;; Load pair with writeback. This is primarily used in function epilogues ;; when restoring [fp,lr] (define_insn "loadwb_pair<GPI:mode>_<P:mode>" diff -uprN '-x*.orig' mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c --- mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c 1970-01-01 05:30:00.000000000 +0530 +++ gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c 2013-10-28 19:01:11.695986357 +0530 @@ -0,0 +1,33 @@ +/* { dg-options "-Os" } */ + +extern void abort (void); + +typedef struct +{ + long int x, y; +} ldst; + +void +f (ldst p0, ldst p1, ldst p2, ldst p3, ldst p4, ldst p5) +{ + if (p2.x != 1 || p2.y != -1 + || p3.x != -1 || p3.y != 1 || p4.x != 0 || p4.y != -1) + abort (); +} + +void +foo () +{ + ldst p0, p1, p2, p3, p4, p5; + + p4.x = 0; + p4.y = -1; + + p5.x = 1; + p5.y = 0; + + f (p0, p1, p2, p3, p4, p5); +} + +/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 3 } } */