> -----Original Message----- > From: Richard Sandiford [mailto:rdsandif...@googlemail.com] > Sent: Thursday, August 02, 2012 5:33 PM > To: Moore, Catherine > Cc: gcc-patches@gcc.gnu.org > Subject: Re: [committed] PR 51931: force non-MIPS16ness for long-branch tests > (NOW RFA: MIPS16 Long Branch Patch) > > Sorry for the late reply. Been thinking it over a bit more. > > "Moore, Catherine" <catherine_mo...@mentor.com> writes: > > Now that we are in the window for 4.8, I'd like to discuss the > > possibility of applying this patch. Have you had a chance to think > > about it? > > In the end, I think we should go for the fix originally suggested in the PR: a > sequence that saves a scratch register to $1 and restores it in the delay > slot. I > checked with the MIPS UK guys that there aren't any known gotchas with doing > that, which shows how long this has been in the making.
Okay. > > The quality of the scratch sequence is clearly vastly inferior to your > version in the > "common" case, but the point is that that case isn't really all that common, > and > this way should at least be fully general. > The problem with inserting branch trampolines separately from laying out the > constant pool is that one could interfere with the other. There are also > corner > cases where we need more trampolines than we have room. > > There's still the possiblity of using trampolines as an optimisation in cases > where > we know its safe, but I'm not sure it's worth the effort. I agree that it's not worth the effort. > > Tested on mips64-linux-gnu, both with -mips16 and -mips16/-fpic. > I did a run with the patch as below, and a run with the extended branches > commented out (so that branches were either unextended or long). Applied. > Thanks for the patch. Catherine > > gcc/ > PR target/51931 > * config/mips/mips-protos.h (mips_strip_unspec_address): Declare. > * config/mips/mips.c (mips_strip_unspec_address): Make extern. > (mips16_rewrite_pool_constant): Make a copy of the pool constant > before adding to a PC-relative table. > (mips16_lay_out_constants): Add a SPLIT_P parameter. > (mips16_load_branch_target, mips16_split_long_branches): New > functions. > (mips_reorg): Update call to mips16_lay_out_constants. > Call mips16_split_long_branches. > * config/mips/predicates.md (pc_or_label_operand): Delete. > * config/mips/mips.md (length): Add a calculation for MIPS16 branches. > Move the extended_mips16 handling further down. > (*branch_equality<mode>_mips16): Replace use pc_or_label_operand > with explicit label_ref and pc. Follow the usual operand numbering. > (*branch_equality<mode>_mips16_inverted): New pattern. > (*jump_mips16): Add length attribute. > (indirect_jump_and_restore_<mode>): New pattern. > (consttable_int): Call mips_strip_unspec_address on the operand. > > gcc/testsuite/ > PR target/51931 > * gcc.c-torture/compile/20001226-1.c: Remove nomips16 attribute. > * g++.dg/opt/longbranch1.C: Likewise. > > Index: gcc/config/mips/mips-protos.h > ================================================================= > == > --- gcc/config/mips/mips-protos.h 2012-08-02 21:19:07.323527035 +0100 > +++ gcc/config/mips/mips-protos.h 2012-08-02 21:51:23.509522298 +0100 > @@ -190,6 +190,7 @@ extern rtx mips_pic_base_register (rtx); extern rtx > mips_got_load (rtx, rtx, enum mips_symbol_type); extern bool > mips_split_symbol (rtx, rtx, enum machine_mode, rtx *); extern rtx > mips_unspec_address (rtx, enum mips_symbol_type); > +extern rtx mips_strip_unspec_address (rtx); > extern void mips_move_integer (rtx, rtx, unsigned HOST_WIDE_INT); extern > bool mips_legitimize_move (enum machine_mode, rtx, rtx); > > Index: gcc/config/mips/mips.c > ================================================================= > == > --- gcc/config/mips/mips.c 2012-08-02 21:19:07.322527035 +0100 > +++ gcc/config/mips/mips.c 2012-08-02 21:51:23.503522297 +0100 > @@ -2563,7 +2563,7 @@ mips_unspec_address (rtx address, enum m > /* If OP is an UNSPEC address, return the address to which it refers, > otherwise return OP itself. */ > > -static rtx > +rtx > mips_strip_unspec_address (rtx op) > { > rtx base, offset; > @@ -14070,7 +14070,7 @@ mips16_rewrite_pool_constant (struct mip > split_const (*x, &base, &offset); > if (GET_CODE (base) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P > (base)) > { > - label = mips16_add_constant (pool, get_pool_constant (base), > + label = mips16_add_constant (pool, copy_rtx (get_pool_constant > + (base)), > get_pool_mode (base)); > base = gen_rtx_LABEL_REF (Pmode, label); > *x = mips_unspec_address_offset (base, offset, SYMBOL_PC_RELATIVE); > @@ -14126,10 +14126,11 @@ mips_cfg_in_reorg (void) > || TARGET_RELAX_PIC_CALLS); > } > > -/* Build MIPS16 constant pools. */ > +/* Build MIPS16 constant pools. Split the instructions if SPLIT_P, > + otherwise assume that they are already split. */ > > static void > -mips16_lay_out_constants (void) > +mips16_lay_out_constants (bool split_p) > { > struct mips16_constant_pool pool; > struct mips16_rewrite_pool_refs_info info; @@ -14138,10 +14139,13 @@ > mips16_lay_out_constants (void) > if (!TARGET_MIPS16_PCREL_LOADS) > return; > > - if (mips_cfg_in_reorg ()) > - split_all_insns (); > - else > - split_all_insns_noflow (); > + if (split_p) > + { > + if (mips_cfg_in_reorg ()) > + split_all_insns (); > + else > + split_all_insns_noflow (); > + } > barrier = 0; > memset (&pool, 0, sizeof (pool)); > for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) @@ -15490,6 > +15494,110 @@ mips_df_reorg (void) > df_finish_pass (false); > } > > +/* Emit code to load LABEL_REF SRC into MIPS16 register DEST. This is > + called very late in mips_reorg, but the caller is required to run > + mips16_lay_out_constants on the result. */ > + > +static void > +mips16_load_branch_target (rtx dest, rtx src) { > + if (TARGET_ABICALLS && !TARGET_ABSOLUTE_ABICALLS) > + { > + rtx page, low; > + > + if (mips_cfun_has_cprestore_slot_p ()) > + mips_emit_move (dest, mips_cprestore_slot (dest, true)); > + else > + mips_emit_move (dest, pic_offset_table_rtx); > + page = mips_unspec_address (src, SYMBOL_GOTOFF_PAGE); > + low = mips_unspec_address (src, SYMBOL_GOT_PAGE_OFST); > + emit_insn (gen_rtx_SET (VOIDmode, dest, > + PMODE_INSN (gen_unspec_got, (dest, page)))); > + emit_insn (gen_rtx_SET (VOIDmode, dest, > + gen_rtx_LO_SUM (Pmode, dest, low))); > + } > + else > + { > + src = mips_unspec_address (src, SYMBOL_ABSOLUTE); > + mips_emit_move (dest, src); > + } > +} > + > +/* If we're compiling a MIPS16 function, look for and split any long > branches. > + This must be called after all other instruction modifications in > + mips_reorg. */ > + > +static void > +mips16_split_long_branches (void) > +{ > + bool something_changed; > + > + if (!TARGET_MIPS16) > + return; > + > + /* Loop until the alignments for all targets are sufficient. */ do > + { > + rtx insn; > + > + shorten_branches (get_insns ()); > + something_changed = false; > + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) > + if (JUMP_P (insn) > + && USEFUL_INSN_P (insn) > + && get_attr_length (insn) > 8) > + { > + rtx old_label, new_label, temp, saved_temp; > + rtx target, jump, jump_sequence; > + > + start_sequence (); > + > + /* Free up a MIPS16 register by saving it in $1. */ > + saved_temp = gen_rtx_REG (Pmode, AT_REGNUM); > + temp = gen_rtx_REG (Pmode, GP_REG_FIRST + 2); > + emit_move_insn (saved_temp, temp); > + > + /* Load the branch target into TEMP. */ > + old_label = JUMP_LABEL (insn); > + target = gen_rtx_LABEL_REF (Pmode, old_label); > + mips16_load_branch_target (temp, target); > + > + /* Jump to the target and restore the register's > + original value. */ > + jump = emit_jump_insn (PMODE_INSN > (gen_indirect_jump_and_restore, > + (temp, temp, saved_temp))); > + JUMP_LABEL (jump) = old_label; > + LABEL_NUSES (old_label)++; > + > + /* Rewrite any symbolic references that are supposed to use > + a PC-relative constant pool. */ > + mips16_lay_out_constants (false); > + > + if (simplejump_p (insn)) > + /* We're going to replace INSN with a longer form. */ > + new_label = NULL_RTX; > + else > + { > + /* Create a branch-around label for the original > + instruction. */ > + new_label = gen_label_rtx (); > + emit_label (new_label); > + } > + > + jump_sequence = get_insns (); > + end_sequence (); > + > + emit_insn_after (jump_sequence, insn); > + if (new_label) > + invert_jump (insn, new_label, false); > + else > + delete_insn (insn); > + something_changed = true; > + } > + } > + while (something_changed); > +} > + > /* Implement TARGET_MACHINE_DEPENDENT_REORG. */ > > static void > @@ -15500,7 +15608,7 @@ mips_reorg (void) > to date if the CFG is available. */ > if (mips_cfg_in_reorg ()) > compute_bb_for_insn (); > - mips16_lay_out_constants (); > + mips16_lay_out_constants (true); > if (mips_cfg_in_reorg ()) > { > mips_df_reorg (); > @@ -15519,6 +15627,7 @@ mips_reorg (void) > /* The expansion could invalidate some of the VR4130 alignment > optimizations, but this should be an extremely rare case anyhow. */ > mips_reorg_process_insns (); > + mips16_split_long_branches (); > } > > /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than > asm text @@ -15639,7 +15748,7 @@ mips_output_mi_thunk (FILE *file, tree t > insn = get_insns (); > insn_locators_alloc (); > split_all_insns_noflow (); > - mips16_lay_out_constants (); > + mips16_lay_out_constants (true); > shorten_branches (insn); > final_start_function (insn, file, 1); > final (insn, file, 1); > Index: gcc/config/mips/predicates.md > ================================================================= > == > --- gcc/config/mips/predicates.md 2012-08-02 21:19:07.322527035 +0100 > +++ gcc/config/mips/predicates.md 2012-08-02 21:51:23.510522298 +0100 > @@ -139,9 +139,6 @@ (define_predicate "muldiv_target_operand > (match_operand 0 "hilo_operand") > (match_operand 0 "register_operand"))) > > -(define_special_predicate "pc_or_label_operand" > - (match_code "pc,label_ref")) > - > (define_predicate "const_call_insn_operand" > (match_code "const,symbol_ref,label_ref") { > Index: gcc/config/mips/mips.md > ================================================================= > == > --- gcc/config/mips/mips.md 2012-08-02 21:19:07.322527035 +0100 > +++ gcc/config/mips/mips.md 2012-08-02 22:01:23.067520831 +0100 > @@ -402,11 +402,7 @@ (define_attr "extended_mips16" "no,yes" > > ;; Length of instruction in bytes. > (define_attr "length" "" > - (cond [(and (eq_attr "extended_mips16" "yes") > - (match_test "TARGET_MIPS16")) > - (const_int 8) > - > - ;; Direct branch instructions have a range of [-0x20000,0x1fffc], > + (cond [;; Direct branch instructions have a range of > + [-0x20000,0x1fffc], > ;; relative to the address of the delay slot. If a branch is > ;; outside this range, we have a choice of two sequences. > ;; For PIC, an out-of-range branch like: > @@ -431,14 +427,21 @@ (define_attr "length" "" > ;; using la/jr in this case too, but we do not do so at > ;; present. > ;; > - ;; Note that this value does not account for the delay slot > + ;; The value we specify here does not account for the delay slot > ;; instruction, whose length is added separately. If the RTL > ;; pattern has no explicit delay slot, mips_adjust_insn_length > - ;; will add the length of the implicit nop. The values for > - ;; forward and backward branches will be different as well. > - (eq_attr "type" "branch") > + ;; will add the length of the implicit nop. The range of > + ;; [-0x20000, 0x1fffc] from the address of the delay slot > + ;; therefore translates to a range of: > + ;; > + ;; [-(0x20000 - sizeof (branch)), 0x1fffc - sizeof (slot)] > + ;; == [-0x1fffc, 0x1fff8] > + ;; > + ;; from the shorten_branches reference address. > + (and (eq_attr "type" "branch") > + (not (match_test "TARGET_MIPS16"))) > (cond [(and (le (minus (match_dup 0) (pc)) (const_int 131064)) > - (le (minus (pc) (match_dup 0)) (const_int 131068))) > + (le (minus (pc) (match_dup 0)) (const_int 131068))) > (const_int 4) > > ;; The non-PIC case: branch, first delay slot, and J. > @@ -453,6 +456,100 @@ (define_attr "length" "" > ;; of an insn. > (const_int MAX_PIC_BRANCH_LENGTH)) > > + ;; An unextended MIPS16 branch has a range of [-0x100, 0xfe] > + ;; from the address of the following instruction, which leads > + ;; to a range of: > + ;; > + ;; [-(0x100 - sizeof (branch)), 0xfe] > + ;; == [-0xfe, 0xfe] > + ;; > + ;; from the shorten_branches reference address. Extended branches > + ;; likewise have a range of [-0x10000, 0xfffe] from the address > + ;; of the following instruction, which leads to a range of: > + ;; > + ;; [-(0x10000 - sizeof (branch)), 0xfffe] > + ;; == [-0xfffc, 0xfffe] > + ;; > + ;; from the reference address. > + ;; > + ;; When a branch is out of range, mips_reorg splits it into a form > + ;; that uses in-range branches. There are four basic sequences: > + ;; > + ;; (1) Absolute addressing with a readable text segment > + ;; (32-bit addresses): > + ;; > + ;; b... foo 2 bytes > + ;; move $1,$2 2 bytes > + ;; lw $2,label 2 bytes > + ;; jr $2 2 bytes > + ;; move $2,$1 2 bytes > + ;; .align 2 0 or 2 bytes > + ;; label: > + ;; .word target 4 bytes > + ;; foo: > + ;; (16 bytes in the worst case) > + ;; > + ;; (2) Absolute addressing with a readable text segment > + ;; (64-bit addresses): > + ;; > + ;; b... foo 2 bytes > + ;; move $1,$2 2 bytes > + ;; ld $2,label 2 bytes > + ;; jr $2 2 bytes > + ;; move $2,$1 2 bytes > + ;; .align 3 0 to 6 bytes > + ;; label: > + ;; .dword target 8 bytes > + ;; foo: > + ;; (24 bytes in the worst case) > + ;; > + ;; (3) Absolute addressing without a readable text segment > + ;; (which requires 32-bit addresses at present): > + ;; > + ;; b... foo 2 bytes > + ;; move $1,$2 2 bytes > + ;; lui $2,%hi(target) 4 bytes > + ;; sll $2,8 2 bytes > + ;; sll $2,8 2 bytes > + ;; addiu $2,%lo(target) 4 bytes > + ;; jr $2 2 bytes > + ;; move $2,$1 2 bytes > + ;; foo: > + ;; (20 bytes) > + ;; > + ;; (4) PIC addressing (which requires 32-bit addresses at present): > + ;; > + ;; b... foo 2 bytes > + ;; move $1,$2 2 bytes > + ;; lw $2,cprestore 0, 2 or 4 bytes > + ;; lw $2,%got(target)($2) 4 bytes > + ;; addiu $2,%lo(target) 4 bytes > + ;; jr $2 2 bytes > + ;; move $2,$1 2 bytes > + ;; foo: > + ;; (20 bytes in the worst case) > + ;; > + ;; Note that the conditions test adjusted lengths, whereas the > + ;; result is an unadjusted length, and is thus twice the true value. > + (and (eq_attr "type" "branch") > + (match_test "TARGET_MIPS16")) > + (cond [(and (le (minus (match_dup 0) (pc)) (const_int 254)) > + (le (minus (pc) (match_dup 0)) (const_int 254))) > + (const_int 4) > + (and (le (minus (match_dup 0) (pc)) (const_int 65534)) > + (le (minus (pc) (match_dup 0)) (const_int 65532))) > + (const_int 8) > + (and (match_test "TARGET_ABICALLS") > + (not (match_test "TARGET_ABSOLUTE_ABICALLS"))) > + (const_int 40) > + (match_test "Pmode == SImode") > + (const_int 32) > + ] (const_int 48)) > + > + (and (eq_attr "extended_mips16" "yes") > + (match_test "TARGET_MIPS16")) > + (const_int 8) > + > ;; "Ghost" instructions occupy no space. > (eq_attr "type" "ghost") > (const_int 0) > @@ -5400,28 +5497,29 @@ (define_insn "*branch_equality<mode>_inv > (define_insn "*branch_equality<mode>_mips16" > [(set (pc) > (if_then_else > - (match_operator 0 "equality_operator" > - [(match_operand:GPR 1 "register_operand" "d,t") > + (match_operator 1 "equality_operator" > + [(match_operand:GPR 2 "register_operand" "d,t") > (const_int 0)]) > - (match_operand 2 "pc_or_label_operand" "") > - (match_operand 3 "pc_or_label_operand" "")))] > + (label_ref (match_operand 0 "" "")) > + (pc)))] > "TARGET_MIPS16" > -{ > - if (operands[2] != pc_rtx) > - { > - if (which_alternative == 0) > - return "b%C0z\t%1,%2"; > - else > - return "bt%C0z\t%2"; > - } > - else > - { > - if (which_alternative == 0) > - return "b%N0z\t%1,%3"; > - else > - return "bt%N0z\t%3"; > - } > -} > + "@ > + b%C1z\t%2,%0 > + bt%C1z\t%0" > + [(set_attr "type" "branch")]) > + > +(define_insn "*branch_equality<mode>_mips16_inverted" > + [(set (pc) > + (if_then_else > + (match_operator 1 "equality_operator" > + [(match_operand:GPR 2 "register_operand" "d,t") > + (const_int 0)]) > + (pc) > + (label_ref (match_operand 0 "" ""))))] > + "TARGET_MIPS16" > + "@ > + b%N1z\t%2,%0 > + bt%N1z\t%0" > [(set_attr "type" "branch")]) > > (define_expand "cbranch<mode>4" > @@ -5717,7 +5815,30 @@ (define_insn "*jump_mips16" > (label_ref (match_operand 0 "" "")))] > "TARGET_MIPS16" > "b\t%l0" > - [(set_attr "type" "branch")]) > + [(set_attr "type" "branch") > + (set (attr "length") > + ;; This calculation is like the normal branch one, but the > + ;; range of the unextended instruction is [-0x800, 0x7fe] rather > + ;; than [-0x100, 0xfe]. This translates to a range of: > + ;; > + ;; [-(0x800 - sizeof (branch)), 0x7fe] > + ;; == [-0x7fe, 0x7fe] > + ;; > + ;; from the shorten_branches reference address. Long-branch > + ;; sequences will replace this one, so the minimum length > + ;; is one instruction shorter than for conditional branches. > + (cond [(and (le (minus (match_dup 0) (pc)) (const_int 2046)) > + (le (minus (pc) (match_dup 0)) (const_int 2046))) > + (const_int 4) > + (and (le (minus (match_dup 0) (pc)) (const_int 65534)) > + (le (minus (pc) (match_dup 0)) (const_int 65532))) > + (const_int 8) > + (and (match_test "TARGET_ABICALLS") > + (not (match_test "TARGET_ABSOLUTE_ABICALLS"))) > + (const_int 36) > + (match_test "Pmode == SImode") > + (const_int 28) > + ] (const_int 44)))]) > > (define_expand "indirect_jump" > [(set (pc) (match_operand 0 "register_operand"))] @@ -5735,6 +5856,18 @@ > (define_insn "indirect_jump_<mode>" > [(set_attr "type" "jump") > (set_attr "mode" "none")]) > > +;; A combined jump-and-move instruction, used for MIPS16 long-branch ;; > +sequences. Having a dedicated pattern is more convenient than ;; > +creating a SEQUENCE for this special case. > +(define_insn "indirect_jump_and_restore_<mode>" > + [(set (pc) (match_operand:P 1 "register_operand" "d")) > + (set (match_operand:P 0 "register_operand" "=d") > + (match_operand:P 2 "register_operand" "y"))] > + "" > + "%(%<jr\t%1\;move\t%0,%2%>%)" > + [(set_attr "type" "multi") > + (set_attr "extended_mips16" "yes")]) > + > (define_expand "tablejump" > [(set (pc) > (match_operand 0 "register_operand")) > @@ -6549,7 +6682,8 @@ (define_insn "consttable_int" > UNSPEC_CONSTTABLE_INT)] > "TARGET_MIPS16" > { > - assemble_integer (operands[0], INTVAL (operands[1]), > + assemble_integer (mips_strip_unspec_address (operands[0]), > + INTVAL (operands[1]), > BITS_PER_UNIT * INTVAL (operands[1]), 1); > return ""; > } > Index: gcc/testsuite/gcc.c-torture/compile/20001226-1.c > ================================================================= > == > --- gcc/testsuite/gcc.c-torture/compile/20001226-1.c 2012-08-02 > 21:19:07.321527035 +0100 > +++ gcc/testsuite/gcc.c-torture/compile/20001226-1.c 2012-08-02 > 21:51:23.511522298 +0100 > @@ -21,10 +21,6 @@ #define C256(x,y) C64(x,y) C64(x,y+4) C6 #define > C1024(x,y) C256(x,y) C256(x+16,y) C256(x+32,y) C256(x+48,y) #define > C4096(x,y) C1024(x,y) C1024(x,y+16) C1024(x,y+32) C1024(x,y+48) > > -#ifdef __mips > -/* See PR 51931. */ > -__attribute__((nomips16)) > -#endif > unsigned foo(int x[64], int y[64]) > { > C4096(x,y); > Index: gcc/testsuite/g++.dg/opt/longbranch1.C > ================================================================= > == > --- gcc/testsuite/g++.dg/opt/longbranch1.C 2012-08-02 > 21:19:07.321527035 +0100 > +++ gcc/testsuite/g++.dg/opt/longbranch1.C 2012-08-02 > 21:51:23.511522298 +0100 > @@ -26,10 +26,6 @@ #define verymuchcode \ > muchcode; muchcode; muchcode; muchcode; muchcode; muchcode; \ > muchcode; muchcode; muchcode; muchcode; muchcode; muchcode > > -#ifdef __mips > -/* See PR 51931. */ > -__attribute__((nomips16)) > -#endif > int > main (int argc, char **argv) > {