Re: [RS6000] Fix PR53038, cfa_restore out of order
On Thu, Apr 26, 2012 at 1:00 AM, Alan Modra wrote: > PR target/53038 > * config/rs6000/rs6000.c (load_lr_save, restore_saved_lr, > load_cr_save, add_crlr_cfa_restore): New functions. > (rs6000_restore_saved_cr): Rename to.. > (restore_saved_cr): ..this. Add cfa_restore notes for cr. > (rs6000_emit_epilogue): Use new functions. Adjust condition > for emitting lr and cr cfa_restore. Emit cfa_restores for fp > regs when using out-of-line restore only when shrink wrapping. Okay. Thanks, David
Re: [RS6000] Fix PR53038, cfa_restore out of order
On Wed, Apr 25, 2012 at 02:00:48PM -0700, Richard Henderson wrote: > On 04/20/12 18:58, Alan Modra wrote: > > + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); > > + RTX_FRAME_RELATED_P (insn) = 1; > > You're probably better off using REG_CFA_REGISTER here, > rather than making dwarf2cfi.c guess what you meant. > The guessing is rather tuned to prologues... It worked! I take your point though, and thanks for reviewing the patch! The new version is just s/FRAME_RELATED_EXPR/CFA_REGISTER/ but I'll post the entire patch again since my other prologue/epilogue patches mean the original pr53038 fix no longer applies cleanly. Bootstrapped and regression tested powerpc-linux. PR target/53038 * config/rs6000/rs6000.c (load_lr_save, restore_saved_lr, load_cr_save, add_crlr_cfa_restore): New functions. (rs6000_restore_saved_cr): Rename to.. (restore_saved_cr): ..this. Add cfa_restore notes for cr. (rs6000_emit_epilogue): Use new functions. Adjust condition for emitting lr and cr cfa_restore. Emit cfa_restores for fp regs when using out-of-line restore only when shrink wrapping. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 186854) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -952,7 +952,6 @@ static bool rs6000_reg_live_or_pic_offset_p (int); static tree rs6000_builtin_vectorized_libmass (tree, tree, tree); static tree rs6000_builtin_vectorized_function (tree, tree, tree); -static void rs6000_restore_saved_cr (rtx, int); static bool rs6000_output_addr_const_extra (FILE *, rtx); static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT); static void rs6000_output_function_epilogue (FILE *, HOST_WIDE_INT); @@ -20276,10 +20275,37 @@ we restore after the pop when possible. */ #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0 +/* Restoring cr is a two step process: loading a reg from the frame + save, then moving the reg to cr. For ABI_V4 we must let the + unwinder know that the stack location is no longer valid at or + before the stack deallocation, but we can't emit a cfa_restore for + cr at the stack deallocation like we do for other registers. + The trouble is that it is possible for the move to cr to be + scheduled after the stack deallocation. So say exactly where cr + is located on each of the two insns. */ + +static rtx +load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func) +{ + rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset); + rtx reg = gen_rtx_REG (SImode, regno); + rtx insn = emit_move_insn (reg, mem); + + if (!exit_func && DEFAULT_ABI == ABI_V4) +{ + rtx cr = gen_rtx_REG (SImode, CR2_REGNO); + rtx set = gen_rtx_SET (VOIDmode, reg, cr); + + add_reg_note (insn, REG_CFA_REGISTER, set); + RTX_FRAME_RELATED_P (insn) = 1; +} + return reg; +} + /* Reload CR from REG. */ static void -rs6000_restore_saved_cr (rtx reg, int using_mfcr_multiple) +restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func) { int count = 0; int i; @@ -20317,13 +20343,61 @@ else for (i = 0; i < 8; i++) if (df_regs_ever_live_p (CR0_REGNO+i) && ! call_used_regs[CR0_REGNO+i]) - { - emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, - CR0_REGNO+i), - reg)); - } + emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO+i), + reg)); + + if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) +{ + rtx insn = get_last_insn (); + rtx cr = gen_rtx_REG (SImode, CR2_REGNO); + + add_reg_note (insn, REG_CFA_RESTORE, cr); + RTX_FRAME_RELATED_P (insn) = 1; +} } +/* Like cr, the move to lr instruction can be scheduled after the + stack deallocation, but unlike cr, its stack frame save is still + valid. So we only need to emit the cfa_restore on the correct + instruction. */ + +static void +load_lr_save (int regno, rtx frame_reg_rtx, int offset) +{ + rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset); + rtx reg = gen_rtx_REG (Pmode, regno); + + emit_move_insn (reg, mem); +} + +static void +restore_saved_lr (int regno, bool exit_func) +{ + rtx reg = gen_rtx_REG (Pmode, regno); + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + rtx insn = emit_move_insn (lr, reg); + + if (!exit_func && flag_shrink_wrap) +{ + add_reg_note (insn, REG_CFA_RESTORE, lr); + RTX_FRAME_RELATED_P (insn) = 1; +} +} + +static rtx +add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores) +{ + if (info->cr_save_p) +cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR2_REGNO), + cfa_restores); + if (info->lr_save_p) +cfa_restores
Re: [RS6000] Fix PR53038, cfa_restore out of order
On 04/20/12 18:58, Alan Modra wrote: > + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); > + RTX_FRAME_RELATED_P (insn) = 1; You're probably better off using REG_CFA_REGISTER here, rather than making dwarf2cfi.c guess what you meant. The guessing is rather tuned to prologues... r~
[RS6000] Fix PR53038, cfa_restore out of order
For abiv4, mtcr/mtcrf emitted in the epilogue can be scheduled past the stack deallocation, where the cfa_restore notes are emitted. The stack tie doesn't stop movement of this insn because it doesn't touch memory. So emit the cfa_restore on the mtcr/mtcrf itself. The PR was about cr, but exactly the same thing can happen with lr too. Bootstrapped and regression tested powerpc-linux. OK to apply? PR target/53038 * config/rs6000/rs6000.c (load_lr_save, restore_saved_lr, load_cr_save, add_crlr_cfa_restore): New functions. (rs6000_restore_saved_cr): Rename to.. (restore_saved_cr): ..this. Add cfa_restore notes for cr. (rs6000_emit_epilogue): Use new functions. Adjust condition for emitting lr and cr cfa_restore. Emit cfa_restores for fp regs when using out-of-line restore only when shrink wrapping. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 186621) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -955,7 +955,6 @@ static bool rs6000_reg_live_or_pic_offset_p (int); static tree rs6000_builtin_vectorized_libmass (tree, tree, tree); static tree rs6000_builtin_vectorized_function (tree, tree, tree); -static void rs6000_restore_saved_cr (rtx, int); static bool rs6000_output_addr_const_extra (FILE *, rtx); static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT); static void rs6000_output_function_epilogue (FILE *, HOST_WIDE_INT); @@ -20083,10 +20082,37 @@ we restore after the pop when possible. */ #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0 +/* Restoring cr is a two step process: loading a reg from the frame + save, then moving the reg to cr. For ABI_V4 we must let the + unwinder know that the stack location is no longer valid at or + before the stack deallocation, but we can't emit a cfa_restore for + cr at the stack deallocation like we do for other registers. + The trouble is that it is possible for the move to cr to be + scheduled after the stack deallocation. So say exactly where cr + is located on each of the two insns. */ + +static rtx +load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func) +{ + rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset); + rtx reg = gen_rtx_REG (SImode, regno); + rtx insn = emit_move_insn (reg, mem); + + if (!exit_func && DEFAULT_ABI == ABI_V4) +{ + rtx cr = gen_rtx_REG (SImode, CR2_REGNO); + rtx set = gen_rtx_SET (VOIDmode, reg, cr); + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); + RTX_FRAME_RELATED_P (insn) = 1; +} + return reg; +} + /* Reload CR from REG. */ static void -rs6000_restore_saved_cr (rtx reg, int using_mfcr_multiple) +restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func) { int count = 0; int i; @@ -20124,13 +20150,61 @@ else for (i = 0; i < 8; i++) if (df_regs_ever_live_p (CR0_REGNO+i) && ! call_used_regs[CR0_REGNO+i]) - { - emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, - CR0_REGNO+i), - reg)); - } + emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO+i), + reg)); + + if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) +{ + rtx insn = get_last_insn (); + rtx cr = gen_rtx_REG (SImode, CR2_REGNO); + + add_reg_note (insn, REG_CFA_RESTORE, cr); + RTX_FRAME_RELATED_P (insn) = 1; +} } +/* Like cr, the move to lr instruction can be scheduled after the + stack deallocation, but unlike cr, its stack frame save is still + valid. So we only need to emit the cfa_restore on the correct + instruction. */ + +static void +load_lr_save (int regno, rtx frame_reg_rtx, int offset) +{ + rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset); + rtx reg = gen_rtx_REG (Pmode, regno); + + emit_move_insn (reg, mem); +} + +static void +restore_saved_lr (int regno, bool exit_func) +{ + rtx reg = gen_rtx_REG (Pmode, regno); + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + rtx insn = emit_move_insn (lr, reg); + + if (!exit_func && flag_shrink_wrap) +{ + add_reg_note (insn, REG_CFA_RESTORE, lr); + RTX_FRAME_RELATED_P (insn) = 1; +} +} + +static rtx +add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores) +{ + if (info->cr_save_p) +cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR2_REGNO), + cfa_restores); + if (info->lr_save_p) +cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (Pmode, LR_REGNO), + cfa_restores); + return cfa_restores; +} + /* Return true if OFFSET from stack pointer can be clobbered by signals. V.4 doesn't have any stac