This patch fixes an issue that limit opportunities for shrink-wrapping on PowerPC. The rs6000 REG_ALLOC_ORDER chooses r0 as the very first gpr to use in code, with r11 also having high priority. This means it is quite likely that r0 or r11 is live on the edge chosen for shrink-wrapping. That's unfortunate since rs6000 uses r0, r11, and r12 as temporaries in the prologue, and thus can't insert the prologue on that edge.
I avoid this situation in most cases by simply changing the register allocation order, making r0 and r11 the last of the volatile gprs to be used. I also noticed that r12 is not used until after the non-volatile regs are allocated. According to the comment, the reason this is done is to avoid putting a DI into the r12,r13 pair (and as r13 is non-volatile would mean saving and restoring all the non-volatile gprs when not inlining the saves/restores). However, when r13 is a fixed reg this cannot occur, so moving r12 before the non-volatile regs makes another reg available before we need to start saving regs. After making the register allocation order changes I found gcc bootstrap failed. This turned out to be caused by shrink-wrapping and register renaming moving some pre-prologue code after the epilogue. To support this we need to ensure no prologue unwind info is left active after the epilogue. Bootstrapped and regression tested powerpc-linux. Two regressions appeared due to a problem in the shrink-wrap code. More on that later. * config/rs6000/rs6000.c (rs6000_emit_epilogue): Emit cfa_restores when flag_shrink_wrap. * gcc/config/rs6000/rs6000.h (EARLY_R12, LATE_R12): Define. (REG_ALLOC_ORDER): Move r12 before call-saved regs when FIXED_R13. Move r11 and r0 later to suit shrink-wrapping. diff -urp -x'*~' -x'*.orig' -x'*.rej' -x.svn gcc-alanshrink1/gcc/config/rs6000/rs6000.c gcc-current/gcc/config/rs6000/rs6000.c --- gcc-alanshrink1/gcc/config/rs6000/rs6000.c 2011-09-26 15:51:19.000000000 +0930 +++ gcc-current/gcc/config/rs6000/rs6000.c 2011-09-23 11:54:00.000000000 +0930 @@ -21001,7 +21045,7 @@ rs6000_emit_epilogue (int sibcall) reg = gen_rtx_REG (V4SImode, i); emit_move_insn (reg, mem); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); } @@ -21052,10 +21096,16 @@ rs6000_emit_epilogue (int sibcall) } /* Set LR here to try to overlap restores below. LR is always saved - above incoming stack, so it never needs REG_CFA_RESTORE. */ + above incoming stack, so it never needs REG_CFA_RESTORE except + when shrink-wrapping as in that case we may have blocks from + before the prologue being moved after the epilogue. */ if (restore_lr && restoring_GPRs_inline) - emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), - gen_rtx_REG (Pmode, 0)); + { + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + emit_move_insn (lr, gen_rtx_REG (Pmode, 0)); + if (flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, lr, cfa_restores); + } /* Load exception handler data registers, if needed. */ if (crtl->calls_eh_return) @@ -21146,7 +21196,7 @@ rs6000_emit_epilogue (int sibcall) reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); insn = emit_move_insn (reg, mem); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) { if (frame_pointer_needed && info->first_gp_reg_save + i @@ -21188,7 +21238,7 @@ rs6000_emit_epilogue (int sibcall) if (info->cr_save_p) { rs6000_restore_saved_cr (cr_save_reg, using_mtcr_multiple); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (SImode, CR2_REGNO), @@ -21217,7 +21267,7 @@ rs6000_emit_epilogue (int sibcall) return; } - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) { if (frame_pointer_needed) { @@ -21246,7 +21296,7 @@ rs6000_emit_epilogue (int sibcall) rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, reg, mem); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); } @@ -21271,7 +21321,7 @@ rs6000_emit_epilogue (int sibcall) rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); insn = emit_move_insn (reg, mem); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) { if (frame_pointer_needed && info->first_gp_reg_save + i @@ -21292,10 +21342,12 @@ rs6000_emit_epilogue (int sibcall) { rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, info->lr_save_offset + sp_offset); + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); emit_move_insn (gen_rtx_REG (Pmode, 0), mem); - emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), - gen_rtx_REG (Pmode, 0)); + emit_move_insn (lr, gen_rtx_REG (Pmode, 0)); + if (flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, lr, cfa_restores); } /* Restore fpr's if we need to do it without calling a function. */ @@ -21316,7 +21368,7 @@ rs6000_emit_epilogue (int sibcall) info->first_fp_reg_save + i); emit_move_insn (reg, mem); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); } @@ -21325,7 +21377,7 @@ rs6000_emit_epilogue (int sibcall) if (info->cr_save_p) { rs6000_restore_saved_cr (cr_save_reg, using_mtcr_multiple); - if (DEFAULT_ABI == ABI_V4) + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) cfa_restores = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (SImode, CR2_REGNO), cfa_restores); diff -urp -x'*~' -x'*.orig' -x'*.rej' -x.svn gcc-alanshrink1/gcc/config/rs6000/rs6000.h gcc-current/gcc/config/rs6000/rs6000.h --- gcc-alanshrink1/gcc/config/rs6000/rs6000.h 2011-09-26 15:29:15.000000000 +0930 +++ gcc-current/gcc/config/rs6000/rs6000.h 2011-09-22 02:30:15.000000000 +0930 @@ -894,10 +894,11 @@ extern unsigned rs6000_pointer_size; cr1 (not saved, but used for FP operations) cr0 (not saved, but used for arithmetic operations) cr4, cr3, cr2 (saved) - r0 (not saved; cannot be base reg) r9 (not saved; best for TImode) - r11, r10, r8-r4 (not saved; highest used first to make less conflict) + r10, r8-r4 (not saved; highest first for less conflict with params) r3 (not saved; return value register) + r11 (not saved; later alloc to help shrink-wrap) + r0 (not saved; cannot be base reg) r31 - r13 (saved; order given to save least number) r12 (not saved; if used for DImode or DFmode would use r13) mq (not saved; best to use it if we can) @@ -922,6 +923,14 @@ extern unsigned rs6000_pointer_size; #define MAYBE_R2_FIXED #endif +#if FIXED_R13 == 1 +#define EARLY_R12 12, +#define LATE_R12 +#else +#define EARLY_R12 +#define LATE_R12 12, +#endif + #define REG_ALLOC_ORDER \ {32, \ 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \ @@ -929,11 +938,11 @@ extern unsigned rs6000_pointer_size; 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ 50, 49, 48, 47, 46, \ 75, 74, 69, 68, 72, 71, 70, \ - 0, MAYBE_R2_AVAILABLE \ - 9, 11, 10, 8, 7, 6, 5, 4, \ - 3, \ + MAYBE_R2_AVAILABLE \ + 9, 10, 8, 7, 6, 5, 4, \ + 3, EARLY_R12 11, 0, \ 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, \ - 18, 17, 16, 15, 14, 13, 12, \ + 18, 17, 16, 15, 14, 13, LATE_R12 \ 64, 66, 65, \ 73, 1, MAYBE_R2_FIXED 67, 76, \ /* AltiVec registers. */ \ -- Alan Modra Australia Development Lab, IBM