This adds a simulated conditional execution option to the RX backend. The RX can do a short conditional forward branch faster than a regular conditional branch (1 byte, 1 cycle!), as long as it's only branching across one or two insns. This patch encourages this behavior via the condexec support, and gives a 1% performance improvement. The patch is against 4.6 but applies to trunk also. Ok?
* config/rx/rx.opt (mcondexec): New. * config/rx/rx-protos.h (rx_condexec_prescan): Declare. * config/rx/rx.md (predicable): New. (define_cond_exec): New. (define_asm_attributes): New. (pop_and_return): Manually call rx_condexec_prescan() (*mov<register_modes:mode>_internal): Likewise. (extend<small_int_modes:mode>si2): Likewise. (zero_extend<small_int_modes:mode>si2): Likewise. (stack_pushm): Likewise. (stack_popm): Likewise. (*stcc): Likewise. (*insv_imm): Likewise. * config/rx/rx.c (TARGET_DEFAULT_TARGET_FLAGS): New. (rx_print_operand): Add %b for a reversed conditional. (rx_condexec_prescan): New. (TARGET_ASM_FINAL_POSTSCAN_INSN): Define. (rx_final_postscan): New. Index: gcc/config/rx/rx-protos.h =================================================================== --- gcc/config/rx/rx-protos.h (revision 173670) +++ gcc/config/rx/rx-protos.h (working copy) @@ -28,6 +28,7 @@ extern void rx_expand_prologue (void); extern int rx_initial_elimination_offset (int, int); +extern void rx_condexec_prescan (void); #ifdef RTX_CODE extern int rx_align_for_label (rtx, int); Index: gcc/config/rx/rx.md =================================================================== --- gcc/config/rx/rx.md (revision 173670) +++ gcc/config/rx/rx.md (working copy) @@ -135,6 +135,21 @@ ;; ---------------------------------------------------------------------------- +(define_attr "predicable" "no,yes" (const_string "yes")) + +(define_cond_exec + [(match_operator 0 "comparison_operator" + [(reg CC_REG) (const_int 0)])] + "TARGET_CONDEXEC" + "b%b0 1f !" + ) + +(define_asm_attributes + [(set_attr "predicable" "no")] + ) + +;; ---------------------------------------------------------------------------- + ;; Comparisons ;; Note - we do not specify the two instructions necessary to perform @@ -364,6 +379,7 @@ (return)] "reload_completed" { + rx_condexec_prescan (); rx_emit_stack_popm (operands, false); return ""; } @@ -567,7 +583,9 @@ (match_operand:register_modes 1 "general_operand" "Int08,Sint16,Sint24,i,r,m,r,Int08,Sint16,Sint24,i"))] "" - { return rx_gen_move_template (operands, false); } + { + rx_condexec_prescan (); + return rx_gen_move_template (operands, false); } [(set_attr "length" "3,4,5,6,2,4,6,5,6,7,8") (set_attr "timings" "11,11,11,11,11,12,11,11,11,11,11")] ) @@ -577,7 +595,8 @@ (sign_extend:SI (match_operand:small_int_modes 1 "nonimmediate_operand" "r,m")))] "" - { return rx_gen_move_template (operands, false); } + { rx_condexec_prescan (); + return rx_gen_move_template (operands, false); } [(set_attr "length" "2,6") (set_attr "timings" "11,12")] ) @@ -587,7 +606,8 @@ (zero_extend:SI (match_operand:small_int_modes 1 "nonimmediate_operand" "r,m")))] "" - { return rx_gen_move_template (operands, true); } + { rx_condexec_prescan (); + return rx_gen_move_template (operands, true); } [(set_attr "length" "2,4") (set_attr "timings" "11,12")] ) @@ -610,6 +630,7 @@ (match_operand:SI 0 "const_int_operand" "n")))])] "reload_completed" { + rx_condexec_prescan (); rx_emit_stack_pushm (operands); return ""; } @@ -636,6 +657,7 @@ (match_operand:SI 0 "const_int_operand" "n")))])] "reload_completed" { + rx_condexec_prescan (); rx_emit_stack_popm (operands, true); return ""; } @@ -770,6 +792,7 @@ (match_dup 0)))] "reload_completed" { + rx_condexec_prescan (); if (GET_CODE (operands[2]) == EQ) return "stz\t%1, %0"; else @@ -1833,6 +1856,7 @@ (match_operand:SI 2 "const_int_operand" ""))] "" { + rx_condexec_prescan (); if (INTVAL (operands[2]) & 1) return "bset\t%1, %0"; else Index: gcc/config/rx/rx.c =================================================================== --- gcc/config/rx/rx.c (revision 173670) +++ gcc/config/rx/rx.c (working copy) @@ -385,6 +385,7 @@ %A Print an operand without a leading # character. %B Print an integer comparison name. + %b Print an integer comparison name, reversed. %C Print a control register name. %F Print a condition code flag name. %H Print high part of a DImode register, integer or address. @@ -499,7 +500,85 @@ fputs (ret, file); break; } + case 'b': + { + enum rtx_code code = GET_CODE (op); + enum machine_mode mode = GET_MODE (XEXP (op, 0)); + const char *ret; + if (mode == CC_Fmode) + { + /* C flag is undefined, and O flag carries unordered. None of the + branch combinations that include O use it helpfully. */ + switch (code) + { + case ORDERED: + ret = "o"; + break; + case UNORDERED: + ret = "no"; + break; + case LT: + ret = "pz"; + break; + case GE: + ret = "n"; + break; + case EQ: + ret = "ne"; + break; + case NE: + ret = "eq"; + break; + default: + gcc_unreachable (); + } + } + else + { + unsigned int flags = flags_from_mode (mode); + + switch (code) + { + case LT: + ret = (flags & CC_FLAG_O ? "ge" : "pz"); + break; + case GE: + ret = (flags & CC_FLAG_O ? "lt" : "n"); + break; + case GT: + ret = "le"; + break; + case LE: + ret = "gt"; + break; + case GEU: + ret = "ltu"; + break; + case LTU: + ret = "geu"; + break; + case GTU: + ret = "leu"; + break; + case LEU: + ret = "gtu"; + break; + case EQ: + ret = "ne"; + break; + case NE: + ret = "eq"; + break; + default: + gcc_unreachable (); + } + gcc_checking_assert ((flags_from_code (code) & ~flags) == 0); + } + fputs (ret, file); + break; + } + case 'C': gcc_assert (CONST_INT_P (op)); switch (INTVAL (op)) @@ -2317,6 +2396,9 @@ return true; } +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS MASK_CONDEXEC + /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */ static void @@ -2788,6 +2870,27 @@ return 0; } + +void +rx_condexec_prescan (void) +{ + if (current_insn_predicate) + { + fprintf (asm_out_file, "b"); + rx_print_operand (asm_out_file, current_insn_predicate, 'b'); + fprintf (asm_out_file, " 1f ! "); + } +} + +#undef TARGET_ASM_FINAL_POSTSCAN_INSN +#define TARGET_ASM_FINAL_POSTSCAN_INSN rx_final_postscan +static void +rx_final_postscan (FILE *file, rtx insn, rtx *operands, int noperands) +{ + if (current_insn_predicate) + fprintf (file, "1:\n"); +} + #undef TARGET_ASM_JUMP_ALIGN_MAX_SKIP #define TARGET_ASM_JUMP_ALIGN_MAX_SKIP rx_max_skip_for_label #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP Index: gcc/config/rx/rx.opt =================================================================== --- gcc/config/rx/rx.opt (revision 173670) +++ gcc/config/rx/rx.opt (working copy) @@ -97,3 +97,9 @@ msave-acc-in-interrupts Target Mask(SAVE_ACC_REGISTER) Specifies whether interrupt functions should save and restore the accumulator register. + +;--------------------------------------------------- + +mcondexec +Target Mask(CONDEXEC) +Specifies whether simulated conditional execution should be enabled.