Hi Guys, I am applying the patch below to sync the RX backend with the 4.6 branch. In practice this means bringing in the peepholes to combine extending loads and simple arithmetic expressions, and adjusting the memory move cost cost for stores. (The insn length attribute is needed for the rx_max_skip_for_label function).
Cheers Nick gcc/ChangeLog 2011-03-30 Nick Clifton <ni...@redhat.com> * config/rx/rx.md: Add peepholes and patterns to combine extending loads and simple arithmetic instructions. * config/rx/rx.h (ADJUST_INSN_LENGTH): Define. * config/rx/rx-protos.h (rx_adjust_insn_length): Prototype. * config/rx/rx.c (rx_is_legitimate_address): Allow QI and HI modes to use pre-decrement and post-increment addressing. (rx_is_restricted_memory_address): Add range checking of REG+INT addresses. (rx_print_operand): Add support for %Q. Fix handling of %Q. (rx_memory_move_cost): Adjust cost of stores. (rx_adjust_insn_length): New function. Index: gcc/config/rx/rx.h =================================================================== --- gcc/config/rx/rx.h (revision 171716) +++ gcc/config/rx/rx.h (working copy) @@ -630,3 +630,10 @@ #define REGISTER_MOVE_COST(MODE,FROM,TO) 2 #define SELECT_CC_MODE(OP,X,Y) rx_select_cc_mode(OP, X, Y) + +#define ADJUST_INSN_LENGTH(INSN,LENGTH) \ + do \ + { \ + (LENGTH) = rx_adjust_insn_length ((INSN), (LENGTH)); \ + } \ + while (0) Index: gcc/config/rx/rx-protos.h =================================================================== --- gcc/config/rx/rx-protos.h (revision 171716) +++ gcc/config/rx/rx-protos.h (working copy) @@ -31,16 +31,17 @@ extern int rx_initial_elimination_offset (int, int); #ifdef RTX_CODE +extern int rx_adjust_insn_length (rtx, int); extern void rx_emit_stack_popm (rtx *, bool); extern void rx_emit_stack_pushm (rtx *); extern void rx_expand_epilogue (bool); extern char * rx_gen_move_template (rtx *, bool); extern bool rx_is_legitimate_constant (rtx); extern bool rx_is_restricted_memory_address (rtx, Mmode); +extern bool rx_match_ccmode (rtx, Mmode); extern void rx_notice_update_cc (rtx body, rtx insn); extern void rx_split_cbranch (Mmode, Rcode, rtx, rtx, rtx); extern Mmode rx_select_cc_mode (Rcode, rtx, rtx); -extern bool rx_match_ccmode (rtx, Mmode); #endif #endif /* GCC_RX_PROTOS_H */ Index: gcc/config/rx/rx.md =================================================================== --- gcc/config/rx/rx.md (revision 171716) +++ gcc/config/rx/rx.md (working copy) @@ -1545,6 +1545,139 @@ (set_attr "length" "3,4,5,6,7,6")] ) +;; A set of peepholes to catch extending loads followed by arithmetic operations. +;; We use iterators where possible to reduce the amount of typing and hence the +;; possibilities for typos. + +(define_code_iterator extend_types [(zero_extend "") (sign_extend "")]) +(define_code_attr letter [(zero_extend "R") (sign_extend "Q")]) + +(define_code_iterator memex_commutative [(plus "") (and "") (ior "") (xor "")]) +(define_code_iterator memex_noncomm [(div "") (udiv "") (minus "")]) +(define_code_iterator memex_nocc [(smax "") (smin "") (mult "")]) + +(define_code_attr op [(plus "add") (and "and") (div "div") (udiv "divu") (smax "max") (smin "min") (mult "mul") (ior "or") (minus "sub") (xor "xor")]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand"))) + (parallel [(set (match_operand:SI 2 "register_operand") + (memex_commutative:SI (match_dup 0) + (match_dup 2))) + (clobber (reg:CC CC_REG))])] + "peep2_regno_dead_p (2, REGNO (operands[0]))" + [(parallel [(set:SI (match_dup 2) + (memex_commutative:SI (match_dup 2) + (extend_types:SI (match_dup 1)))) + (clobber (reg:CC CC_REG))])] +) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand"))) + (parallel [(set (match_operand:SI 2 "register_operand") + (memex_commutative:SI (match_dup 2) + (match_dup 0))) + (clobber (reg:CC CC_REG))])] + "peep2_regno_dead_p (2, REGNO (operands[0]))" + [(parallel [(set:SI (match_dup 2) + (memex_commutative:SI (match_dup 2) + (extend_types:SI (match_dup 1)))) + (clobber (reg:CC CC_REG))])] +) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand"))) + (parallel [(set (match_operand:SI 2 "register_operand") + (memex_noncomm:SI (match_dup 2) + (match_dup 0))) + (clobber (reg:CC CC_REG))])] + "peep2_regno_dead_p (2, REGNO (operands[0]))" + [(parallel [(set:SI (match_dup 2) + (memex_noncomm:SI (match_dup 2) + (extend_types:SI (match_dup 1)))) + (clobber (reg:CC CC_REG))])] +) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand"))) + (set (match_operand:SI 2 "register_operand") + (memex_nocc:SI (match_dup 0) + (match_dup 2)))] + "peep2_regno_dead_p (2, REGNO (operands[0]))" + [(set:SI (match_dup 2) + (memex_nocc:SI (match_dup 2) + (extend_types:SI (match_dup 1))))] +) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand"))) + (set (match_operand:SI 2 "register_operand") + (memex_nocc:SI (match_dup 2) + (match_dup 0)))] + "peep2_regno_dead_p (2, REGNO (operands[0]))" + [(set:SI (match_dup 2) + (memex_nocc:SI (match_dup 2) + (extend_types:SI (match_dup 1))))] +) + +(define_insn "<memex_commutative:code>si3_<extend_types:code><small_int_modes:mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (memex_commutative:SI (match_operand:SI 1 "register_operand" "%0") + (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q")))) + (clobber (reg:CC CC_REG))] + "" + "<memex_commutative:op>\t%<extend_types:letter>2, %0" + [(set_attr "timings" "33") + (set_attr "length" "5")] ;; This length is corrected in rx_adjust_insn_length +) + +(define_insn "<memex_noncomm:code>si3_<extend_types:code><small_int_modes:mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (memex_noncomm:SI (match_operand:SI 1 "register_operand" "0") + (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q")))) + (clobber (reg:CC CC_REG))] + "" + "<memex_noncomm:op>\t%<extend_types:letter>2, %0" + [(set_attr "timings" "33") + (set_attr "length" "5")] ;; This length is corrected in rx_adjust_insn_length +) + +(define_insn "<memex_nocc:code>si3_<extend_types:code><small_int_modes:mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (memex_nocc:SI (match_operand:SI 1 "register_operand" "%0") + (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))] + "" + "<memex_nocc:op>\t%<extend_types:letter>2, %0" + [(set_attr "timings" "33") + (set_attr "length" "5")] ;; This length is corrected in rx_adjust_insn_length +) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand"))) + (set (reg:CC CC_REG) + (compare:CC (match_operand:SI 2 "register_operand") + (match_dup 0)))] + "peep2_regno_dead_p (2, REGNO (operands[0]))" + [(set (reg:CC CC_REG) + (compare:CC (match_dup 2) + (extend_types:SI (match_dup 1))))] +) + +(define_insn "comparesi3_<extend_types:code><small_int_modes:mode>" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 0 "register_operand" "=r") + (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand" "Q"))))] + "" + "cmp\t%<extend_types:letter>1, %0" + [(set_attr "timings" "33") + (set_attr "length" "5")] ;; This length is corrected in rx_adjust_insn_length +) + ;; Floating Point Instructions (define_insn "addsf3" Index: gcc/config/rx/rx.c =================================================================== --- gcc/config/rx/rx.c (revision 171716) +++ gcc/config/rx/rx.c (working copy) @@ -86,7 +86,7 @@ /* Register Indirect. */ return true; - if (GET_MODE_SIZE (mode) == 4 + if (GET_MODE_SIZE (mode) <= 4 && (GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)) /* Pre-decrement Register Indirect or Post-increment Register Indirect. */ @@ -117,7 +117,7 @@ if (val < 0) return false; - + switch (GET_MODE_SIZE (mode)) { default: @@ -126,7 +126,7 @@ case 1: factor = 1; break; } - if (val > (65535 * factor)) + if (val >= (0x10000 * factor)) return false; return (val % factor) == 0; } @@ -167,8 +167,6 @@ bool rx_is_restricted_memory_address (rtx mem, enum machine_mode mode) { - rtx base, index; - if (! rx_is_legitimate_address (mode, mem, reload_in_progress || reload_completed)) return false; @@ -184,12 +182,19 @@ return false; case PLUS: - /* Only allow REG+INT addressing. */ - base = XEXP (mem, 0); - index = XEXP (mem, 1); + { + rtx base, index; + + /* Only allow REG+INT addressing. */ + base = XEXP (mem, 0); + index = XEXP (mem, 1); - return RX_REG_P (base) && CONST_INT_P (index); + if (! RX_REG_P (base) || ! CONST_INT_P (index)) + return false; + return IN_RANGE (INTVAL (index), 0, (0x10000 * GET_MODE_SIZE (mode)) - 1); + } + case SYMBOL_REF: /* Can happen when small data is being supported. Assume that it will be resolved into GP+INT. */ @@ -387,11 +392,14 @@ %L Print low part of a DImode register, integer or address. %N Print the negation of the immediate value. %Q If the operand is a MEM, then correctly generate - register indirect or register relative addressing. */ + register indirect or register relative addressing. + %R Like %Q but for zero-extending loads. */ static void rx_print_operand (FILE * file, rtx op, int letter) { + bool unsigned_load = false; + switch (letter) { case 'A': @@ -451,6 +459,7 @@ else { unsigned int flags = flags_from_mode (mode); + switch (code) { case LT: @@ -589,10 +598,15 @@ rx_print_integer (file, - INTVAL (op)); break; + case 'R': + gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4); + unsigned_load = true; + /* Fall through. */ case 'Q': if (MEM_P (op)) { HOST_WIDE_INT offset; + rtx mem = op; op = XEXP (op, 0); @@ -627,22 +641,24 @@ rx_print_operand (file, op, 0); fprintf (file, "]."); - switch (GET_MODE_SIZE (GET_MODE (op))) + switch (GET_MODE_SIZE (GET_MODE (mem))) { case 1: - gcc_assert (offset < 65535 * 1); - fprintf (file, "B"); + gcc_assert (offset <= 65535 * 1); + fprintf (file, unsigned_load ? "UB" : "B"); break; case 2: gcc_assert (offset % 2 == 0); - gcc_assert (offset < 65535 * 2); - fprintf (file, "W"); + gcc_assert (offset <= 65535 * 2); + fprintf (file, unsigned_load ? "UW" : "W"); break; - default: + case 4: gcc_assert (offset % 4 == 0); - gcc_assert (offset < 65535 * 4); + gcc_assert (offset <= 65535 * 4); fprintf (file, "L"); break; + default: + gcc_unreachable (); } break; } @@ -2449,8 +2465,7 @@ default: /* FIXME: Can this ever happen ? */ - abort (); - return false; + gcc_unreachable (); } break; @@ -2593,7 +2608,7 @@ static int rx_memory_move_cost (enum machine_mode mode, reg_class_t regclass, bool in) { - return 2 + memory_move_secondary_cost (mode, regclass, in); + return (in ? 2 : 0) + memory_move_secondary_cost (mode, regclass, in); } /* Convert a CC_MODE to the set of flags that it represents. */ @@ -2778,6 +2793,113 @@ return opsize - 1; return 0; } + +/* Compute the real length of the extending load-and-op instructions. */ + +int +rx_adjust_insn_length (rtx insn, int current_length) +{ + rtx extend, mem, offset; + bool zero; + int factor; + + switch (INSN_CODE (insn)) + { + default: + return current_length; + + case CODE_FOR_plussi3_zero_extendhi: + case CODE_FOR_andsi3_zero_extendhi: + case CODE_FOR_iorsi3_zero_extendhi: + case CODE_FOR_xorsi3_zero_extendhi: + case CODE_FOR_divsi3_zero_extendhi: + case CODE_FOR_udivsi3_zero_extendhi: + case CODE_FOR_minussi3_zero_extendhi: + case CODE_FOR_smaxsi3_zero_extendhi: + case CODE_FOR_sminsi3_zero_extendhi: + case CODE_FOR_multsi3_zero_extendhi: + case CODE_FOR_comparesi3_zero_extendqi: + zero = true; + factor = 2; + break; + + case CODE_FOR_plussi3_sign_extendhi: + case CODE_FOR_andsi3_sign_extendhi: + case CODE_FOR_iorsi3_sign_extendhi: + case CODE_FOR_xorsi3_sign_extendhi: + case CODE_FOR_divsi3_sign_extendhi: + case CODE_FOR_udivsi3_sign_extendhi: + case CODE_FOR_minussi3_sign_extendhi: + case CODE_FOR_smaxsi3_sign_extendhi: + case CODE_FOR_sminsi3_sign_extendhi: + case CODE_FOR_multsi3_sign_extendhi: + case CODE_FOR_comparesi3_zero_extendhi: + zero = false; + factor = 2; + break; + + case CODE_FOR_plussi3_zero_extendqi: + case CODE_FOR_andsi3_zero_extendqi: + case CODE_FOR_iorsi3_zero_extendqi: + case CODE_FOR_xorsi3_zero_extendqi: + case CODE_FOR_divsi3_zero_extendqi: + case CODE_FOR_udivsi3_zero_extendqi: + case CODE_FOR_minussi3_zero_extendqi: + case CODE_FOR_smaxsi3_zero_extendqi: + case CODE_FOR_sminsi3_zero_extendqi: + case CODE_FOR_multsi3_zero_extendqi: + case CODE_FOR_comparesi3_sign_extendqi: + zero = true; + factor = 1; + break; + + case CODE_FOR_plussi3_sign_extendqi: + case CODE_FOR_andsi3_sign_extendqi: + case CODE_FOR_iorsi3_sign_extendqi: + case CODE_FOR_xorsi3_sign_extendqi: + case CODE_FOR_divsi3_sign_extendqi: + case CODE_FOR_udivsi3_sign_extendqi: + case CODE_FOR_minussi3_sign_extendqi: + case CODE_FOR_smaxsi3_sign_extendqi: + case CODE_FOR_sminsi3_sign_extendqi: + case CODE_FOR_multsi3_sign_extendqi: + case CODE_FOR_comparesi3_sign_extendhi: + zero = false; + factor = 1; + break; + } + + /* We are expecting: (SET (REG) (<OP> (REG) (<EXTEND> (MEM)))). */ + extend = single_set (insn); + gcc_assert (extend != NULL_RTX); + + extend = SET_SRC (extend); + if (GET_CODE (XEXP (extend, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (extend, 0)) == SIGN_EXTEND) + extend = XEXP (extend, 0); + else + extend = XEXP (extend, 1); + + gcc_assert ((zero && (GET_CODE (extend) == ZERO_EXTEND)) + || (! zero && (GET_CODE (extend) == SIGN_EXTEND))); + + mem = XEXP (extend, 0); + gcc_checking_assert (MEM_P (mem)); + if (REG_P (XEXP (mem, 0))) + return (zero && factor == 1) ? 2 : 3; + + /* We are expecting: (MEM (PLUS (REG) (CONST_INT))). */ + gcc_checking_assert (GET_CODE (XEXP (mem, 0)) == PLUS); + gcc_checking_assert (REG_P (XEXP (XEXP (mem, 0), 0))); + + offset = XEXP (XEXP (mem, 0), 1); + gcc_checking_assert (GET_CODE (offset) == CONST_INT); + + if (IN_RANGE (INTVAL (offset), 0, 255 * factor)) + return (zero && factor == 1) ? 3 : 4; + + return (zero && factor == 1) ? 4 : 5; +} #undef TARGET_ASM_JUMP_ALIGN_MAX_SKIP #define TARGET_ASM_JUMP_ALIGN_MAX_SKIP rx_max_skip_for_label