Re: [PATCH 3/6] Allow jumps in epilogues
On 04/13/2011 02:38 PM, Bernd Schmidt wrote: > This still requires the i386 output_set_got which I think I can cope > with [...] Patch below, to be applied on top of all the others. Only lightly tested so far beyond standard (fairly useless) regression tests, by comparing generated assembly before/after, for -fpic -march=pentium and core2, for i686-linux and i686-apple-darwin10 (for TARGET_MACHO). I've not found or managed to create a testcase for making MI thunks generate a call to output_set_got. I think it should work, but it's not tested. Bernd * config/i386/i386.c (output_set_got): Don't call dwarf2out_flush_queued_reg_saves. (ix86_reorg): Split set_got patterns. (i386_dwarf_handle_frame_unspec, i386_dwarf_flush_queued_register_saves): New static functions. (TARGET_DWARF_HANDLE_FRAME_UNSPEC, TARGET_DWARF_FLUSH_QUEUED_REGISTER_SAVES): New macros. * config/i386/i386.md (set_got_call, set_got_pop, set_got_add): New patterns. * dwarf2out.c (scan_until_barrier): Use the target's dwarf_flush_queued_register_saves hook. * target.def (dwarf_flush_queued_register_saves): New hook. * doc/tm.texi: Regenerate. Index: gcc/config/i386/i386.c === --- gcc.orig/config/i386/i386.c +++ gcc/config/i386/i386.c @@ -8953,6 +8953,8 @@ output_set_got (rtx dest, rtx label ATTR output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); else { + /* For normal functions, this pattern is split, but we can still +get here for thunks. */ output_asm_insn ("call\t%a2", xops); #ifdef DWARF2_UNWIND_INFO /* The call to next label acts as a push. */ @@ -9010,12 +9012,6 @@ output_set_got (rtx dest, rtx label ATTR get_pc_thunk_name (name, REGNO (dest)); pic_labels_used |= 1 << REGNO (dest); -#ifdef DWARF2_UNWIND_INFO - /* Ensure all queued register saves are flushed before the -call. */ - if (dwarf2out_do_frame ()) - dwarf2out_flush_queued_reg_saves (); -#endif xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); xops[2] = gen_rtx_MEM (QImode, xops[2]); output_asm_insn ("call\t%X2", xops); @@ -30458,6 +30454,56 @@ ix86_reorg (void) with old MDEP_REORGS that are not CFG based. Recompute it now. */ compute_bb_for_insn (); + /* Split any set_got patterns so that we interact correctly with + dwarf2out. */ + if (!TARGET_64BIT && !TARGET_VXWORKS_RTP && !TARGET_DEEP_BRANCH_PREDICTION + && flag_pic) +{ + rtx insn, next; + for (insn = get_insns (); insn; insn = next) + { + rtx pat, label, dest, cst, gotsym, new_insn; + int icode; + + next = NEXT_INSN (insn); + if (!NONDEBUG_INSN_P (insn)) + continue; + + icode = recog_memoized (insn); + if (icode != CODE_FOR_set_got && icode != CODE_FOR_set_got_labelled) + continue; + + extract_insn (insn); + if (icode == CODE_FOR_set_got) + { + label = gen_label_rtx (); + cst = const0_rtx; + } + else + { + label = recog_data.operand[1]; + cst = const1_rtx; + } + + dest = recog_data.operand[0]; + pat = gen_set_got_call (label, cst); + new_insn = emit_insn_before (pat, insn); + RTX_FRAME_RELATED_P (new_insn) = 1; + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (new_insn), 0, 1)) = 1; + gotsym = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); + pat = gen_set_got_pop (dest, label); + new_insn = emit_insn_before (pat, insn); + RTX_FRAME_RELATED_P (new_insn) = 1; + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (new_insn), 0, 1)) = 1; + if (!TARGET_MACHO) + { + pat = gen_set_got_add (dest, gotsym, label); + new_insn = emit_insn_before (pat, insn); + } + delete_insn (insn); + } +} + if (optimize && optimize_function_for_speed_p (cfun)) { if (TARGET_PAD_SHORT_FUNCTION) @@ -30475,6 +30521,30 @@ ix86_reorg (void) move_or_delete_vzeroupper (); } +/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. + This is called from dwarf2out.c to emit call frame instructions + for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ +static void +i386_dwarf_handle_frame_unspec (rtx pattern ATTRIBUTE_UNUSED, + int index ATTRIBUTE_UNUSED) +{ + gcc_assert (index == UNSPEC_SET_GOT); +} + +/* Handle the TARGET_DWARF_FLUSH_QUEUED_REGISTER_SAVES hook. + This is called from dwarf2out.c to decide whether all queued + register saves should be emitted before INSN. */ +static bool +i386_dwarf_flush_queued_register_saves (rtx insn) +{ + if (!TARGET_VXWORKS_RTP || !flag_pic) +{ + int icode = recog_memoized (insn); + return (
Re: [PATCH 3/6] Allow jumps in epilogues
The final part, an updated version of the old 004-dw2cfg patch. This does much better placement of remember/restore; in almost all cases the code is identical to what we currently generate, modulo minor differences around the PROLOGUE_END label. I've made it emit queued register saves before PROLOGUE_END so that we can use the state there for forced labels. Bernd Index: gcc/dwarf2out.c === --- gcc.orig/dwarf2out.c +++ gcc/dwarf2out.c @@ -465,12 +465,11 @@ static void initial_return_save (rtx); static HOST_WIDE_INT stack_adjust_offset (const_rtx, HOST_WIDE_INT, HOST_WIDE_INT); static void output_cfi (dw_cfi_ref, dw_fde_ref, int); -static void output_cfi_directive (dw_cfi_ref); +static void output_cfi_directive (FILE *, dw_cfi_ref); static void output_call_frame_info (int); static void dwarf2out_note_section_used (void); static bool clobbers_queued_reg_save (const_rtx); static void dwarf2out_frame_debug_expr (rtx); -static void dwarf2out_cfi_begin_epilogue (rtx); static void dwarf2out_frame_debug_restore_state (void); /* Support for complex CFA locations. */ @@ -823,9 +822,6 @@ new_cfi (void) /* The insn after which a new CFI note should be emitted. */ static rtx cfi_insn; -/* True if remember_state should be emitted before following CFI directive. */ -static bool emit_cfa_remember; - /* True if any CFI directives were emitted at the current insn. */ static bool any_cfis_emitted; @@ -868,28 +864,34 @@ dwarf2out_maybe_emit_cfi_label (void) } } +static void +add_cfa_remember (void) +{ + dw_cfi_ref cfi_remember; + + /* Emit the state save. */ + cfi_remember = new_cfi (); + cfi_remember->dw_cfi_opc = DW_CFA_remember_state; + add_fde_cfi (cfi_remember); +} + +/* Nonnull if add_fde_cfi should not just emit a NOTE_INSN_CFI, but + also add the CFI to this vector. */ +static cfi_vec *cfi_insn_vec; + /* Add CFI to the current fde at the PC value indicated by LABEL if specified, or to the CIE if LABEL is NULL. */ static void add_fde_cfi (dw_cfi_ref cfi) { - if (emit_cfa_remember) -{ - dw_cfi_ref cfi_remember; - - /* Emit the state save. */ - emit_cfa_remember = false; - cfi_remember = new_cfi (); - cfi_remember->dw_cfi_opc = DW_CFA_remember_state; - add_fde_cfi (cfi_remember); -} - any_cfis_emitted = true; if (cfi_insn != NULL) { cfi_insn = emit_note_after (NOTE_INSN_CFI, cfi_insn); NOTE_CFI (cfi_insn) = cfi; + if (cfi_insn_vec != NULL) + VEC_safe_push (dw_cfi_ref, gc, *cfi_insn_vec, cfi); } else { @@ -980,12 +982,6 @@ static dw_cfa_location old_cfa; from the CFA. */ static dw_cfa_location cfa_store; -/* The current save location around an epilogue. */ -static dw_cfa_location cfa_remember; - -/* Like cfa_remember, but a copy of old_cfa. */ -static dw_cfa_location old_cfa_remember; - /* The running total of the size of arguments pushed onto the stack. */ static HOST_WIDE_INT args_size; @@ -1339,179 +1335,6 @@ stack_adjust_offset (const_rtx pattern, return offset; } -/* Precomputed args_size for CODE_LABELs and BARRIERs preceeding them, - indexed by INSN_UID. */ - -static HOST_WIDE_INT *barrier_args_size; - -/* Helper function for compute_barrier_args_size. Handle one insn. */ - -static HOST_WIDE_INT -compute_barrier_args_size_1 (rtx insn, HOST_WIDE_INT cur_args_size, -VEC (rtx, heap) **next) -{ - HOST_WIDE_INT offset = 0; - int i; - - if (! RTX_FRAME_RELATED_P (insn)) -{ - if (prologue_epilogue_contains (insn)) - /* Nothing */; - else if (GET_CODE (PATTERN (insn)) == SET) - offset = stack_adjust_offset (PATTERN (insn), cur_args_size, 0); - else if (GET_CODE (PATTERN (insn)) == PARALLEL - || GET_CODE (PATTERN (insn)) == SEQUENCE) - { - /* There may be stack adjustments inside compound insns. Search -for them. */ - for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--) - if (GET_CODE (XVECEXP (PATTERN (insn), 0, i)) == SET) - offset += stack_adjust_offset (XVECEXP (PATTERN (insn), 0, i), -cur_args_size, offset); - } -} - else -{ - rtx expr = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); - - if (expr) - { - expr = XEXP (expr, 0); - if (GET_CODE (expr) == PARALLEL - || GET_CODE (expr) == SEQUENCE) - for (i = 1; i < XVECLEN (expr, 0); i++) - { - rtx elem = XVECEXP (expr, 0, i); - - if (GET_CODE (elem) == SET && !RTX_FRAME_RELATED_P (elem)) - offset += stack_adjust_offset (elem, cur_args_size, offset); - } - } -} - -#ifndef STACK_GROWS_DOWNWARD - offset = -offset; -#endif - - cur_args_size += offset; - if (cur_args_size < 0) -
Re: [PATCH 3/6] Allow jumps in epilogues
The second part is a new patch, which reduces the amount of different code paths we can take in add_fde_cfi, as this was becoming unmanageable. The concept is to first emit just the CFI notes, in all cases. Later, after we're done producing the CFI insns we need, another pass over the rtl adds the necessary labels and set_loc/advance_loc CFIs. One consequence of this is that def_cfa_1 can no longer use lookup_cfa, so it just compares to an old_cfa variable instead. This also requires target-specific changes as some ports use dwarf2out_cfi_label. An (untested) example of the necessary changes is in config/arm. Bernd --- config/arm/arm.c |5 config/ia64/ia64.c |6 config/sparc/sparc.c |7 config/vax/vax.c |2 dwarf2out.c | 467 --- dwarf2out.h | 32 +++ final.c |5 target.def |2 tree.h | 31 --- 9 files changed, 270 insertions(+), 287 deletions(-) Index: gcc/config/arm/arm.c === --- gcc.orig/config/arm/arm.c +++ gcc/config/arm/arm.c @@ -19977,18 +19977,19 @@ thumb_pushpop (FILE *f, unsigned long ma if (push && pushed_words && dwarf2out_do_frame ()) { - char *l = dwarf2out_cfi_label (false); int pushed_mask = real_regs; + dwarf2out_maybe_emit_cfi_label (); + *cfa_offset += pushed_words * 4; - dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset); + dwarf2out_def_cfa (SP_REGNUM, *cfa_offset); pushed_words = 0; pushed_mask = real_regs; for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1) { if (pushed_mask & 1) - dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset); + dwarf2out_reg_save (regno, 4 * pushed_words++ - *cfa_offset); } } } @@ -20997,10 +20998,9 @@ thumb1_output_function_prologue (FILE *f the stack pointer. */ if (dwarf2out_do_frame ()) { - char *l = dwarf2out_cfi_label (false); - + dwarf2out_maybe_emit_cfi_label (); cfa_offset = cfa_offset + crtl->args.pretend_args_size; - dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); + dwarf2out_def_cfa (SP_REGNUM, cfa_offset); } } @@ -21046,10 +21046,10 @@ thumb1_output_function_prologue (FILE *f if (dwarf2out_do_frame ()) { - char *l = dwarf2out_cfi_label (false); + dwarf2out_maybe_emit_cfi_label (); cfa_offset = cfa_offset + 16; - dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); + dwarf2out_def_cfa (SP_REGNUM, cfa_offset); } if (l_mask) @@ -22749,7 +22749,7 @@ arm_except_unwind_info (struct gcc_optio stack alignment. */ static void -arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) +arm_dwarf_handle_frame_unspec (rtx pattern, int index) { rtx unspec = SET_SRC (pattern); gcc_assert (GET_CODE (unspec) == UNSPEC); @@ -22760,8 +22760,7 @@ arm_dwarf_handle_frame_unspec (const cha /* ??? We should set the CFA = (SP & ~7). At this point we haven't put anything on the stack, so hopefully it won't matter. CFA = SP will be correct after alignment. */ - dwarf2out_reg_save_reg (label, stack_pointer_rtx, - SET_DEST (pattern)); + dwarf2out_reg_save_reg (stack_pointer_rtx, SET_DEST (pattern)); break; default: gcc_unreachable (); Index: gcc/config/ia64/ia64.c === --- gcc.orig/config/ia64/ia64.c +++ gcc/config/ia64/ia64.c @@ -330,7 +330,7 @@ static enum machine_mode ia64_promote_fu static void ia64_trampoline_init (rtx, tree, rtx); static void ia64_override_options_after_change (void); -static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int); +static void ia64_dwarf_handle_frame_unspec (rtx, int); static tree ia64_builtin_decl (unsigned, bool); static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t); @@ -9710,9 +9710,7 @@ ia64_dwarf2out_def_steady_cfa (rtx insn, processing. The real CFA definition is set up above. */ static void -ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label), - rtx ARG_UNUSED (pattern), - int index) +ia64_dwarf_handle_frame_unspec (rtx ARG_UNUSED (pattern), int index) { gcc_assert (index == UNSPECV_ALLOC); } Index: gcc/config/sparc/sparc.c === --- gcc.orig/config/sparc/sparc.c +++ gcc/config/sparc/sparc.c @@ -454,7 +454,7 @@ static unsigned int sparc_function_arg_b const_tree); static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); -static void sparc_dwarf_handle_frame_unspec (const cha
Re: [PATCH 3/6] Allow jumps in epilogues
On 04/13/2011 04:14 PM, Bernd Schmidt wrote: > On 04/11/2011 07:10 PM, Richard Henderson wrote: >> Ok. > > Did you receive my reply to this message from earlier today? It doesn't > seem to have made it to gcc-patches yet. Since gcc-patches appears to have dropped the message, I'll resend it in three parts. There are three patches here, but they must be applied together (things will mostly work otherwise, but I expect -freorder-blocks-and-partition is broken in the intermediate stages). Below is the ChangeLog for the entire set, and the first of the patches. This is just a new version of the previously posted 002-scanfirst patch, now changed to delete the CFI notes afterwards in order to avoid -fcompare-debug failures. Bernd * target.def (dwarf_handle_frame_unspec): Remove label argument. * doc/tm.texi: Regenerate. * tree.h (dwarf2out_cfi_label, dwarf2out_def_cfa, dwarf2out_window_save, dwarf2out_reg_save, dwarf2out_return_save, dwarf2out_return_reg, dwarf2out_reg_save_reg): Don't declare. * final.c (final_start_function): Call dwarf2out_frame_debug_after_prologue. (final_scan_insn): Don't call dwarf2out_frame_debug for anything. Handle NOTE_INSN_CFI and NOTE_INSN_CFI_LABEL. (final): Delete these notes. * insn-notes.def (CFI, CFI_LABEL): New. * jump.c (addr_vec_p): New function. * dwarf2out.c (cfi_insn): New static variable. (dwarf2out_cfi_label): Remove force argument. All callers changed. Only generate the label, don't emit it. (dwarf2out_maybe_emit_cfi_label): New function. (add_fde_cfi): Remove label argument. All callers changed. Remove most code; leave a condition to either emit a CFI insn, or add the CFI to the FDE CFI vector. (add_cie_cfi): New static function. (add_cfi): Remove function. (old_cfa): New static variable. (cfa_remember): Remove static variable. (dwarf2out_def_cfa): Replace label argument with a bool for_cie argument. All callers changed. Don't use lookup_cfa; use and update the global old_cfa variable. Call add_fde_cfi or add_cie_cfi at the end. (reg_save): Replace label argument with a bool. All callers changed. Call add_fde_cfi or add_cie_cfi at the end. (dwarf2out_reg_save, dwarf2out_return_save, dwarf2out_return_reg, dwarf2out_args_szie, dwarf2out_stack_adjust, dwarf2out_reg_save_reg, dwarf2out_frame_debug_def_cfa, dwarf2out_frame_debug_cfa_offset, dwarf2out_frame_debug_cfa_register, dwarf2out_frame_debug_cfa_restore, dwarf2out_frame_debug_cfa_expression, dwarf2out_frame_debug_expr): Remove label argument. All callers changed. (barrier_args_size): Remove variable. (compute_barrier_args_size_1, compute_barrier_args_size): Remove functions. (dwarf2out_notice_stack_adjust): Don't handle barriers. (last_reg_save_label): Remove variable. All sets and uses removed. (cfi_label_required_p, add_cfis_to_fde): New static functions. (dwarf2out_frame_debug_restore_state): Simply add the new CFI. (dwarf2out_frame_debug): Set cfi_insn, and clear it. Don't call dwarf2out_flush_queued_reg_saves at the top. (dwarf2out_frame_debug_init): Initialize old_cfa. (copy_cfi_vec_parts): New static function. (jump_target_info): New struct type. (dwarf2out_cfi_begin_epilogue): Remove. (save_point_p, record_current_state, maybe_record_jump_target, vec_is_prefix_of, append_extra_cfis, debug_cfi_vec, switch_note_p, scan_until_barrier, find_best_starting_point): New static functions. (dwarf2out_frame_debug_after_prologue): New function. (dwarf2out_emit_cfi): New function. (output_cfi_directive): New FILE argument. All callers changed. Avoid some paths if it is not asm_out_file; otherwise print to it. (output_all_cfis): Remove function. (output_cfis): Remove do_cfi_asm arg. All callers changed. Never call output_cfi_directive. (dwarf2out_frame_init): Initialize old_cfa. (dwarf2out_switch_text_section): Don't initialize dw_fde_current_label. Don't call output_all_cfis. * dwarf2out.h (dwarf2out_cfi_label, dwarf2out_def_cfa, dwarf2out_window_save, dwarf2out_reg_save, dwarf2out_return_save, dwarf2out_return_reg, dwarf2out_reg_save_reg, dwarf2out_emit_cfi, dwarf2out_frame_debug_after_prologue): Declare. (dwarf2out_cfi_begin_epilogue, dwarf2out_frame_debug_restore_state): Don't declare. (struct dw_cfi_struct): Add forward declaration. * rtl.h (union rtunion_def): Add rt_cfi member. (XCFI, XCCFI, NOTE_CFI, NOTE_LABEL_NUMBER): New macros. (addr_vec_p): Declare. * config/sparc/sparc.c (sparc_dwarf_handle_frame_unspec): Remove
Re: [PATCH 3/6] Allow jumps in epilogues
On Wed, Apr 13, 2011 at 07:44:26AM -0700, Richard Henderson wrote: > Yeah, while I was working on dwarf line numbers recently, I found that > just about the only thing that would produce anything interesting was > a profiled bootstrap. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48253#c1 is what I've been using when I touched dwarf2out recently. Jakub
Re: [PATCH 3/6] Allow jumps in epilogues
On 04/13/2011 05:38 AM, Bernd Schmidt wrote: > This bootstraps and tests ok on i686-linux. However, there is work left > to be done. Can I take you up on your offer to work with me on this? > This still requires the i386 output_set_got which I think I can cope > with, but the ia64 backend does a number of things with unwinding that I > don't understand. Also, I'll be away the next two weeks - if you arrive > at a complete version during that time it would be great if you could > commit it. Ok, I'll put this on my to-do list. > One thing to note is that it seems surprisingly hard to make > -freorder-blocks-and-partition do anything interesting. There's one C++ > testcase (partition2.C I think) which I used to debug this code, but > other than that I haven't really found anything that actually generates > two nonempty partitions. Yeah, while I was working on dwarf line numbers recently, I found that just about the only thing that would produce anything interesting was a profiled bootstrap. r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 04/11/2011 07:10 PM, Richard Henderson wrote: > Ok. Did you receive my reply to this message from earlier today? It doesn't seem to have made it to gcc-patches yet. Bernd
Re: [PATCH 3/6] Allow jumps in epilogues
On 04/05/2011 02:58 PM, Bernd Schmidt wrote: > * dwarf2out.c (struct dw_cfi_struct): Remove member dw_cfi_next. > (dw_cfi_ref): Add DEF_VEC_P and some DEF_VEC_ALLOC_Ps. > (cfi_vec): New typedef. > (struct dw_fde_struct): Make dw_fde_cfi a cfi_vec. Replace > dw_fde_switch_cfi with an integer dw_fde_switch_cfi_index. > (cie_cfi_vec): New static variable. > (cie_cfi_head): Delete. > (add_cfi): Accept a cfi_vec * as first argument. All callers and > declaration changed. Use vector rather than list operations. > (new_cfi): Don't initialize the dw_cfi_next field. > (add_fde_cfi): Allocate cie_cfi_vec if necessary. Use vector > rather than list operations. > (lookup_cfa): Use vector rather than list operations. > (output_cfis): New argument upto. Accept a cfi_vec rather than > a dw_cfi_ref list head as argument. All callers changed. > Iterate over the vector using upto as a maximum index. > (output_all_cfis): New static function. > (output_fde): Use vector rather than list operations. Use the > new upto argument for output_cfis rather than manipulating a > list. > (dwarf2out_begin_prologue): Change initializations to match > new struct members. > (dwarf2out_switch_text_section): Initialize dw_fde_switch_cfi_index > from the vector length rather than searching for the end of a list. > Use output_all_cfis. > (convert_cfa_to_fb_loc_list): Use vector rather than list operations. Ok. r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/31/2011 11:28 PM, Richard Henderson wrote: >> 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks >> larger than it is due to reindentation > > Like 001, this one looks like it's totally independent of and of > the other changes, and a good cleanup. Please go ahead and test > and commit this one independently. Here's a new version - the code had changed underneath me in the meantime, and I had some off-by-one errors involving the switch_index. Bootstrapped and tested on i686-linux; I've also built my set of testcases with -freorder-blocks-and-partition without code generation changes from before to after. Ok? Bernd * dwarf2out.c (struct dw_cfi_struct): Remove member dw_cfi_next. (dw_cfi_ref): Add DEF_VEC_P and some DEF_VEC_ALLOC_Ps. (cfi_vec): New typedef. (struct dw_fde_struct): Make dw_fde_cfi a cfi_vec. Replace dw_fde_switch_cfi with an integer dw_fde_switch_cfi_index. (cie_cfi_vec): New static variable. (cie_cfi_head): Delete. (add_cfi): Accept a cfi_vec * as first argument. All callers and declaration changed. Use vector rather than list operations. (new_cfi): Don't initialize the dw_cfi_next field. (add_fde_cfi): Allocate cie_cfi_vec if necessary. Use vector rather than list operations. (lookup_cfa): Use vector rather than list operations. (output_cfis): New argument upto. Accept a cfi_vec rather than a dw_cfi_ref list head as argument. All callers changed. Iterate over the vector using upto as a maximum index. (output_all_cfis): New static function. (output_fde): Use vector rather than list operations. Use the new upto argument for output_cfis rather than manipulating a list. (dwarf2out_begin_prologue): Change initializations to match new struct members. (dwarf2out_switch_text_section): Initialize dw_fde_switch_cfi_index from the vector length rather than searching for the end of a list. Use output_all_cfis. (convert_cfa_to_fb_loc_list): Use vector rather than list operations. Index: dwarf2out.c === --- dwarf2out.c (revision 171839) +++ dwarf2out.c (working copy) @@ -267,7 +267,6 @@ typedef union GTY(()) dw_cfi_oprnd_struc dw_cfi_oprnd; typedef struct GTY(()) dw_cfi_struct { - dw_cfi_ref dw_cfi_next; enum dwarf_call_frame_info dw_cfi_opc; dw_cfi_oprnd GTY ((desc ("dw_cfi_oprnd1_desc (%1.dw_cfi_opc)"))) dw_cfi_oprnd1; @@ -276,6 +275,12 @@ typedef struct GTY(()) dw_cfi_struct { } dw_cfi_node; +DEF_VEC_P (dw_cfi_ref); +DEF_VEC_ALLOC_P (dw_cfi_ref, heap); +DEF_VEC_ALLOC_P (dw_cfi_ref, gc); + +typedef VEC(dw_cfi_ref, gc) *cfi_vec; + /* This is how we define the location of the CFA. We use to handle it as REG + OFFSET all the time, but now it can be more complex. It can now be either REG + CFA_OFFSET or *(REG + BASE_OFFSET) + CFA_OFFSET. @@ -304,8 +309,8 @@ typedef struct GTY(()) dw_fde_struct { const char *dw_fde_vms_begin_epilogue; const char *dw_fde_second_begin; const char *dw_fde_second_end; - dw_cfi_ref dw_fde_cfi; - dw_cfi_ref dw_fde_switch_cfi; /* Last CFI before switching sections. */ + cfi_vec dw_fde_cfi; + int dw_fde_switch_cfi_index; /* Last CFI before switching sections. */ HOST_WIDE_INT stack_realignment; unsigned funcdef_number; /* Dynamic realign argument pointer register. */ @@ -410,8 +415,8 @@ current_fde (void) return fde_table_in_use ? &fde_table[fde_table_in_use - 1] : NULL; } -/* A list of call frame insns for the CIE. */ -static GTY(()) dw_cfi_ref cie_cfi_head; +/* A vector of call frame insns for the CIE. */ +static GTY(()) cfi_vec cie_cfi_vec; /* Some DWARF extensions (e.g., MIPS/SGI) implement a subprogram attribute that accelerates the lookup of the FDE associated @@ -451,7 +456,7 @@ static GTY(()) section *cold_text_sectio static char *stripattributes (const char *); static const char *dwarf_cfi_name (unsigned); static dw_cfi_ref new_cfi (void); -static void add_cfi (dw_cfi_ref *, dw_cfi_ref); +static void add_cfi (cfi_vec *, dw_cfi_ref); static void add_fde_cfi (const char *, dw_cfi_ref); static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *, dw_cfa_location *); static void lookup_cfa (dw_cfa_location *); @@ -807,7 +812,6 @@ new_cfi (void) { dw_cfi_ref cfi = ggc_alloc_dw_cfi_node (); - cfi->dw_cfi_next = NULL; cfi->dw_cfi_oprnd1.dw_cfi_reg_num = 0; cfi->dw_cfi_oprnd2.dw_cfi_reg_num = 0; @@ -817,9 +821,8 @@ new_cfi (void) /* Add a Call Frame Instruction to list of instructions. */ static inline void -add_cfi (dw_cfi_ref *list_head, dw_cfi_ref cfi) +add_cfi (cfi_vec *vec, dw_cfi_ref cfi) { - dw_cfi_ref *p; dw_fde_ref fde = current_fde (); /* When DRAP is used, CFA is defined with an expression. Redefine @@ -841,11 +844,7 @@ add_cfi (dw_cfi_ref *list_head, dw_cfi_r
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/31/2011 03:07 PM, Bernd Schmidt wrote: > No, it's used - but it looks like I forgot to quilt refresh and the > final.c changes weren't included. New patch below. After this patch, the > whole function is processed before final, and rather than emitting cfi > directives immediately, we create these notes which cause the directives > to be emitted during final. Ah, much better. I had wondered what I was missing. > This probably shouldn't be committed separately when these changes go > in, as (I think) it breaks -freorder-blocks-and-partition as well as the > code in i386.c; it's split out simply to show an intermediate stage. Sure. > Yes, this falls under "inefficient CFI insns". I wanted to post a > preliminary proof-of-concept patch set now which generates > correct(-looking) output, but not necessarily optimized output. Not > quite sure yet how to tackle this but I'll think of something. Ok, I'll go ahead and apply all the patches locally and see what the output actually looks like. Perhaps I'll have more suggestions. r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/31/2011 11:28 PM, Richard Henderson wrote: >> Rather than use a CFG, I've tried to do something similar to >> compute_barrier_args_size, using JUMP_LABELs etc. > > A reasonable solution for now, I suppose. Ok, sounds like you haven't discovered a fatal flaw; I'll keep hacking on it. >> Summary of the patches: >> 001 - just create a dwarf2out_frame_debug_init function. > > Ok. > >> 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks >> larger than it is due to reindentation > > Like 001, this one looks like it's totally independent of and of > the other changes, and a good cleanup. Please go ahead and test > and commit this one independently. Will do. >> 002 - Make it walk the function in a first pass and record CFIs to >> be output later > > Do I correctly understand that NOTE_INSN_CFI isn't actually being > used in this patch? No, it's used - but it looks like I forgot to quilt refresh and the final.c changes weren't included. New patch below. After this patch, the whole function is processed before final, and rather than emitting cfi directives immediately, we create these notes which cause the directives to be emitted during final. This probably shouldn't be committed separately when these changes go in, as (I think) it breaks -freorder-blocks-and-partition as well as the code in i386.c; it's split out simply to show an intermediate stage. >> 004 - Change the function walk introduced in 002 so that it records >> and restores state when reaching jumps/barriers > > I'm not too fond of vec_is_prefix_of. The Problem is that you're > not applying any understanding of the actual data, just doing a > string comparison (effectively). Yes, this falls under "inefficient CFI insns". I wanted to post a preliminary proof-of-concept patch set now which generates correct(-looking) output, but not necessarily optimized output. Not quite sure yet how to tackle this but I'll think of something. Bernd * cfgcleanup.c (flow_find_head_matching_sequence): Ignore epilogue notes. * df-problems.c (can_move_insns_across): Don't stop at epilogue notes. * dwarf2out.c (dwarf2out_cfi_begin_epilogue): Also allow a simplejump to end the block. Index: gcc/dwarf2out.c === --- gcc.orig/dwarf2out.c +++ gcc/dwarf2out.c @@ -471,6 +471,8 @@ static void output_call_frame_info (int) static void dwarf2out_note_section_used (void); static bool clobbers_queued_reg_save (const_rtx); static void dwarf2out_frame_debug_expr (rtx, const char *); +static void dwarf2out_cfi_begin_epilogue (rtx); +static void dwarf2out_frame_debug_restore_state (void); /* Support for complex CFA locations. */ static void output_cfa_loc (dw_cfi_ref, int); @@ -879,6 +881,9 @@ dwarf2out_cfi_label (bool force) return label; } +/* The insn after which a new CFI note should be emitted. */ +static rtx cfi_insn; + /* True if remember_state should be emitted before following CFI directive. */ static bool emit_cfa_remember; @@ -961,7 +966,8 @@ add_fde_cfi (const char *label, dw_cfi_r } } - output_cfi_directive (cfi); + cfi_insn = emit_note_after (NOTE_INSN_CFI, cfi_insn); + NOTE_CFI (cfi_insn) = cfi; list_head = &fde->dw_fde_cfi; any_cfis_emitted = true; @@ -2790,6 +2796,11 @@ dwarf2out_frame_debug (rtx insn, bool af rtx note, n; bool handled_one = false; + if (after_p) +cfi_insn = insn; + else +cfi_insn = PREV_INSN (insn); + if (!NONJUMP_INSN_P (insn) || clobbers_queued_reg_save (insn)) dwarf2out_flush_queued_reg_saves (); @@ -2911,6 +2922,7 @@ void dwarf2out_frame_debug_init (void) { size_t i; + rtx insn; /* Flush any queued register saves. */ dwarf2out_flush_queued_reg_saves (); @@ -2937,12 +2949,64 @@ dwarf2out_frame_debug_init (void) XDELETEVEC (barrier_args_size); barrier_args_size = NULL; } + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) +{ + rtx pat; + if (BARRIER_P (insn)) + { + dwarf2out_frame_debug (insn, false); + continue; + } + else if (NOTE_P (insn)) + { + switch (NOTE_KIND (insn)) + { + case NOTE_INSN_EPILOGUE_BEG: +#if defined (HAVE_epilogue) + dwarf2out_cfi_begin_epilogue (insn); +#endif + break; + case NOTE_INSN_CFA_RESTORE_STATE: + cfi_insn = insn; + dwarf2out_frame_debug_restore_state (); + break; + } + continue; + } + if (!NONDEBUG_INSN_P (insn)) + continue; + pat = PATTERN (insn); + if (asm_noperands (pat) >= 0) + continue; + if (GET_CODE (pat) == SEQUENCE) + { + int j; + for (j = 1; j < XVECLEN (pat, 0); j++) + dwarf2out_frame_debug (XVECEXP (pat, 0, j), false); + insn = XVECEXP (pat, 0
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/31/2011 12:59 PM, Bernd Schmidt wrote: >> So long as late late compilation passes continue to not move frame-related >> insns across basic block boundaries, we should be fine. > > I'm nervous about this as the reorg pass can do arbitrary > transformations. On Blackfin for example, we can reorder basic blocks > for the sake of loop optimizations; sched-ebb can create new blocks, > etc. I think it would be best if we can somehow make it work during > final, without a CFG. I guess that's the best thing for now. I'm sure we all agree that long term all transformations should preserve the CFG all the way through final. At which point this could be implemented as a pass on the function after all transformations are complete. > Rather than use a CFG, I've tried to do something similar to > compute_barrier_args_size, using JUMP_LABELs etc. A reasonable solution for now, I suppose. > > Summary of the patches: > 001 - just create a dwarf2out_frame_debug_init function. Ok. > 002 - Make it walk the function in a first pass and record CFIs to > be output later Do I correctly understand that NOTE_INSN_CFI isn't actually being used in this patch? > 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks > larger than it is due to reindentation Like 001, this one looks like it's totally independent of and of the other changes, and a good cleanup. Please go ahead and test and commit this one independently. > 004 - Change the function walk introduced in 002 so that it records > and restores state when reaching jumps/barriers I'm not too fond of vec_is_prefix_of. The Problem is that you're not applying any understanding of the actual data, just doing a string comparison (effectively). Imagine two code paths A and B that both save R2 and R3 into their respective stack slots. Imagine that -- for whatever reason -- the stores have been scheduled differently such that on path A R2 is saved before R3, and the reverse on path B. Your prefix test will conclude that paths A and B end with different unwind info, even though they are in fact compatible. Using some mechanism by which we can compare aggregate CFI information on a per-register basis ought to also vastly improve the efficiency in adjusting the cfi info between code points. It should also enable proper information in the -freorder-blocks-and-partition case. > * i386.c uses dwarf2out_frame_debug directly in some cases and is > unconverted Hum. I wonder what the best way to attack this. It's a local change, adjusting and then restoring the unwind state between two insns that should not be scheduled separately. We could turn them into two unspec_volatiles, and lose scheduling across this pattern. But ideally this is a value that ought to be shrink-wrapped. It's expensive to compute, and there are many early-return situations in which we don't need it. I suppose we could split this pattern manually in i386 reorg; forcing this to be split before final even at -O0. At that point all shrink-wrapping would be done and an unspecv replacement would be ok. > * I haven't tested whether my attempt to use > get_eh_landing_pad_from_rtx in the absence of a CFG actually works It will. This information is stored in cfun->eh. By design this information must survive until final, so that we can emit the actual eh info into the appropriate tables. > * Computed jumps and nonlocal gotos aren't handled. I think this > could be done by recording the state at NOTE_INSN_PROLOGUE_END > and using that for all labels we can't otherwise reach. That should be reasonable. You could assert that all of these labels are in forced_labels. All computed branch targets should be listed therein. r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/26/2011 04:26 AM, Richard Henderson wrote: > I think the ideal thing would be a pass while the cfg is still extant that > captures the unwind info into notes; these can be recorded at basic block > boundaries, so that they persist until the end of compilation. > > So long as late late compilation passes continue to not move frame-related > insns across basic block boundaries, we should be fine. I'm nervous about this as the reorg pass can do arbitrary transformations. On Blackfin for example, we can reorder basic blocks for the sake of loop optimizations; sched-ebb can create new blocks, etc. I think it would be best if we can somehow make it work during final, without a CFG. > I'm willing to work with you on the problem of cfg-aware unwind info. We > have needed this for a really long time; there are existing bugs related > to exception handling and !ACCUMULATE_OUTGOING_ARGS that would be fixed by > this. I'm appending a series of draft patches. It tries to compute CFIs for stretches of straight-line code and records state for potential jump targets, iterating until everything is covered. Then, a pass through all insns from start to finish reassembles the pieces into a coherent string of CFI insns. Rather than use a CFG, I've tried to do something similar to compute_barrier_args_size, using JUMP_LABELs etc. Summary of the patches: 001 - just create a dwarf2out_frame_debug_init function. 002 - Make it walk the function in a first pass and record CFIs to be output later 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks larger than it is due to reindentation 004 - Change the function walk introduced in 002 so that it records and restores state when reaching jumps/barriers For now I'd just like some input on whether this looks remotely viable. There are a number of known problems with it: * The generated CFIs are inefficient (poor use of remember/restore) * -freorder-blocks-and-partition is broken * i386.c uses dwarf2out_frame_debug directly in some cases and is unconverted * I haven't tested whether my attempt to use get_eh_landing_pad_from_rtx in the absence of a CFG actually works * Computed jumps and nonlocal gotos aren't handled. I think this could be done by recording the state at NOTE_INSN_PROLOGUE_END and using that for all labels we can't otherwise reach. Bernd Index: gcc/dwarf2out.c === --- gcc.orig/dwarf2out.c +++ gcc/dwarf2out.c @@ -2790,38 +2790,6 @@ dwarf2out_frame_debug (rtx insn, bool af rtx note, n; bool handled_one = false; - if (insn == NULL_RTX) -{ - size_t i; - - /* Flush any queued register saves. */ - dwarf2out_flush_queued_reg_saves (); - - /* Set up state for generating call frame debug info. */ - lookup_cfa (&cfa); - gcc_assert (cfa.reg - == (unsigned long)DWARF_FRAME_REGNUM (STACK_POINTER_REGNUM)); - - cfa.reg = STACK_POINTER_REGNUM; - cfa_store = cfa; - cfa_temp.reg = -1; - cfa_temp.offset = 0; - - for (i = 0; i < num_regs_saved_in_regs; i++) - { - regs_saved_in_regs[i].orig_reg = NULL_RTX; - regs_saved_in_regs[i].saved_in_reg = NULL_RTX; - } - num_regs_saved_in_regs = 0; - - if (barrier_args_size) - { - XDELETEVEC (barrier_args_size); - barrier_args_size = NULL; - } - return; -} - if (!NONJUMP_INSN_P (insn) || clobbers_queued_reg_save (insn)) dwarf2out_flush_queued_reg_saves (); @@ -2939,6 +2907,38 @@ dwarf2out_frame_debug (rtx insn, bool af dwarf2out_flush_queued_reg_saves (); } +void +dwarf2out_frame_debug_init (void) +{ + size_t i; + + /* Flush any queued register saves. */ + dwarf2out_flush_queued_reg_saves (); + + /* Set up state for generating call frame debug info. */ + lookup_cfa (&cfa); + gcc_assert (cfa.reg + == (unsigned long)DWARF_FRAME_REGNUM (STACK_POINTER_REGNUM)); + + cfa.reg = STACK_POINTER_REGNUM; + cfa_store = cfa; + cfa_temp.reg = -1; + cfa_temp.offset = 0; + + for (i = 0; i < num_regs_saved_in_regs; i++) +{ + regs_saved_in_regs[i].orig_reg = NULL_RTX; + regs_saved_in_regs[i].saved_in_reg = NULL_RTX; +} + num_regs_saved_in_regs = 0; + + if (barrier_args_size) +{ + XDELETEVEC (barrier_args_size); + barrier_args_size = NULL; +} +} + /* Determine if we need to save and restore CFI information around this epilogue. If SIBCALL is true, then this is a sibcall epilogue. If we do need to save/restore, then emit the save now, and insert a Index: gcc/dwarf2out.h === --- gcc.orig/dwarf2out.h +++ gcc/dwarf2out.h @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. extern void dwarf2out_decl (tree); extern void dwarf2out_frame_debug (rtx, bool); +extern void dwarf2out_frame_debug_init (void); extern void dwarf2out_cfi_begin_epilogue
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/25/2011 10:34 AM, Bernd Schmidt wrote: > I don't know much about the unwinding code. I'm currently thinking about > writing out a cfi_remember_state at the start of the function, restoring > that clean state when necessary at the start of a new block and emitting > the necessary directives to reach the correct state. What directives > should I expect to be required? Can I get by just with cfi_offset and > cfi_def_cfa_offset, or will something else be necessary? Yes, several things: register, expression, gnu_args_size, perhaps a few more. I think the ideal thing would be a pass while the cfg is still extant that captures the unwind info into notes; these can be recorded at basic block boundaries, so that they persist until the end of compilation. So long as late late compilation passes continue to not move frame-related insns across basic block boundaries, we should be fine. Irritatingly, the exact place to locate this pass is difficult to pin down. Immediately before md_reorg is the last place we have the cfg. But we do strange things in, e.g. ia64 where we rebuild the cfg and run sched_ebb during md_reorg. Of course, ia64 is a bad example because its unwind info is target-specific, and quite a lot of the possible benefit of shrink wrapping is lost via the register windowing. I'm willing to work with you on the problem of cfg-aware unwind info. We have needed this for a really long time; there are existing bugs related to exception handling and !ACCUMULATE_OUTGOING_ARGS that would be fixed by this. r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 06:19 PM, Richard Henderson wrote: > In general, with shrink-wrapping, we can have essentially arbitrary > differences in unwind info between blocks that are sequential. So, while that isn't the case just yet with the current shrink-wrapping patch, it seems I will either have to make dwarf2out fully general, or ensure that basic blocks occur in only a certain order (the prologue must be written out only after all basic blocks that can be executed before or without reaching it). I don't know much about the unwinding code. I'm currently thinking about writing out a cfi_remember_state at the start of the function, restoring that clean state when necessary at the start of a new block and emitting the necessary directives to reach the correct state. What directives should I expect to be required? Can I get by just with cfi_offset and cfi_def_cfa_offset, or will something else be necessary? Bernd
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 06:27 PM, Richard Henderson wrote: > On 03/23/2011 10:22 AM, Bernd Schmidt wrote: >> On 03/23/2011 06:19 PM, Richard Henderson wrote: >>> body >>> body >>> restore r1 XXX >>> restore r2 XXX >>> jmp L2 XXX >>> >>> L1: bodyYYY >>> bodyYYY >>> restore r2 >>> >>> L2: restore r3 >>> return >> >>> In general, with shrink-wrapping, we can have essentially arbitrary >>> differences in unwind info between blocks that are sequential. >> >> I don't think this can actually happen with the current implementation. >> There is only one prologue, and all epilogues (the normal one and the >> sibcall epilogues) match it exactly. I don't believe we can generate >> code as in the example above, both before and after my patch. > > Um.. then what's this "allow jumps in epilogues" thing of which you speak? > If there's a jump, then it goes somewhere, and branches over something. > I see no constraints on what that something might be. > > Could you give an example of a transformation that is allowed by this? The idea was to be able to share a single return instruction between epilogue/non-epilogue return paths, so that e.g. on i686 a conditional return could be implemented as a conditional jump to a common return insn. The allow-jumps patch then becomes necessary because bbro can move the blocks around. It does seem, however, that bbro can in fact cause problems for the unwind information when the prologue is no longer in the first block. Let me try to come up with a solution for that. Bernd
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 10:22 AM, Bernd Schmidt wrote: > On 03/23/2011 06:19 PM, Richard Henderson wrote: >> body >> body >> restore r1 XXX >> restore r2 XXX >> jmp L2 XXX >> >> L1: bodyYYY >> bodyYYY >> restore r2 >> >> L2: restore r3 >> return > >> In general, with shrink-wrapping, we can have essentially arbitrary >> differences in unwind info between blocks that are sequential. > > I don't think this can actually happen with the current implementation. > There is only one prologue, and all epilogues (the normal one and the > sibcall epilogues) match it exactly. I don't believe we can generate > code as in the example above, both before and after my patch. Um.. then what's this "allow jumps in epilogues" thing of which you speak? If there's a jump, then it goes somewhere, and branches over something. I see no constraints on what that something might be. Could you give an example of a transformation that is allowed by this? r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 06:19 PM, Richard Henderson wrote: > body > body > restore r1 XXX > restore r2 XXX > jmp L2 XXX > > L1: bodyYYY > bodyYYY > restore r2 > > L2: restore r3 > return > In general, with shrink-wrapping, we can have essentially arbitrary > differences in unwind info between blocks that are sequential. I don't think this can actually happen with the current implementation. There is only one prologue, and all epilogues (the normal one and the sibcall epilogues) match it exactly. I don't believe we can generate code as in the example above, both before and after my patch. Bernd
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 09:48 AM, Bernd Schmidt wrote: >> With no more code than this, I cannot believe you're generating correct >> unwind info anymore. > > Why not? Are you worried about the code at the destination of the jump? > That should be preceded by another block falling through into it which > also has a NOTE_INSN_EPILOGUE_BEG. > > If that isn't the problem you have in mind, what is and how can we test > for it? body body restore r1 XXX restore r2 XXX jmp L2 XXX L1: bodyYYY bodyYYY restore r2 L2: restore r3 return Assume for the moment that "restore" on this target is something that can't be delayed or repeated. E.g. a pop, rather than a move which leaves the saved value in memory at a unknown offset from the CFA. This means we have to emit unwind directives immediately after the restore insn and cannot delay the epilogue unwind until we deallocate the entire stack frame. This means that your patch either gets the unwind info wrong for the XXX sequence or the YYY sequence. Correct unwind info would look like body body .cfi_remember_state restore r1 .cfi_restore r1 restore r2 .cfi_restore r2 jmp L2 .cfi_restore_state L1: body body restore r2 .cfi_restore r2 L2: // validate the unwind info across the CFG making sure that the incoming // edges contain the same unwind info here. restore r3 .cfi_restore r3 return In general, with shrink-wrapping, we can have essentially arbitrary differences in unwind info between blocks that are sequential. We have to be prepared to fully adjust the unwind state between blocks. Assume a { x } is the set of registers saved into the stack frame in a given block. We have both incoming and outgoing sets. foo:// in: { } cmp r1,r2 jne L1 // out: { } L0: // in: { } save r8 save r9 body ... // out: { r8, r9 } L2: // in: { r8, r9, r10 } body body ... // out: { r8, r9, r10 } L1: // in: { } save r8 save r9 save r10 body ... // out: { r8, r9, r10 } L3: // in: { r8, r9, r10 } restore r10 // out: { r8, r9 } L4: // in: { r8, r9 } restore r9 restore r8 return This layout requires more than just .cfi_remember_state/restore_state between blocks. We have to be prepared to emit full unwind info at any point. Assume cfi info marked with XXX exists between basic blocks to fixup the transition points: L0: save r8 save r9 .cfi_offset r8,x .cfi_offset r9,y body .cfi_offset r10,z XXX L2: body body .cfi_restore r8 XXX .cfi_restore r9 XXX .cfi_restore r10XXX L1: save r8 save r9 save r10 .cfi_offset r8,x .cfi_offset r9,y .cfi_offset r10,z body If this isn't clear, please ask questions. The problem of unwinding is way more complicated than what you appear to be assuming. r~
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 05:46 PM, Richard Henderson wrote: > On 03/23/2011 07:50 AM, Bernd Schmidt wrote: >> dwarf2out has code that starts scanning from NOTE_INSN_EPILOGUE_BEG >> until it finds the return jump. When there is common code in several >> blocks ending in a return, we might want to share this, and in that case >> it would be possible to encounter a simplejump rather than a returnjump. >> This should be safe, and the following patch allows it. > > With no more code than this, I cannot believe you're generating correct > unwind info anymore. Why not? Are you worried about the code at the destination of the jump? That should be preceded by another block falling through into it which also has a NOTE_INSN_EPILOGUE_BEG. If that isn't the problem you have in mind, what is and how can we test for it? Bernd
Re: [PATCH 3/6] Allow jumps in epilogues
On 03/23/2011 07:50 AM, Bernd Schmidt wrote: > dwarf2out has code that starts scanning from NOTE_INSN_EPILOGUE_BEG > until it finds the return jump. When there is common code in several > blocks ending in a return, we might want to share this, and in that case > it would be possible to encounter a simplejump rather than a returnjump. > This should be safe, and the following patch allows it. With no more code than this, I cannot believe you're generating correct unwind info anymore. It would be possible to handle code merging including epilogue blocks if (and IMO only if) you track unwind state on a per-block basis, and propagate this information around the CFG, finally linearizing this when blocks are re-ordered for the last time before final. At present, sadly, we assume steady-state for the unwind info except before PROLOGUE_END and after EPILOGUE_BEG. r~
[PATCH 3/6] Allow jumps in epilogues
dwarf2out has code that starts scanning from NOTE_INSN_EPILOGUE_BEG until it finds the return jump. When there is common code in several blocks ending in a return, we might want to share this, and in that case it would be possible to encounter a simplejump rather than a returnjump. This should be safe, and the following patch allows it. Bernd * cfgcleanup.c (flow_find_head_matching_sequence): Ignore epilogue notes. * df-problems.c (can_move_insns_across): Don't stop at epilogue notes. * dwarf2out.c (dwarf2out_cfi_begin_epilogue): Also allow a simplejump to end the block. Index: gcc/cfgcleanup.c === --- gcc.orig/cfgcleanup.c +++ gcc/cfgcleanup.c @@ -1184,20 +1184,12 @@ flow_find_head_matching_sequence (basic_ while (true) { - /* Ignore notes, except NOTE_INSN_EPILOGUE_BEG. */ + /* Ignore notes. */ while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1)) - { - if (NOTE_P (i1) && NOTE_KIND (i1) == NOTE_INSN_EPILOGUE_BEG) - break; - i1 = NEXT_INSN (i1); - } + i1 = NEXT_INSN (i1); while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2)) - { - if (NOTE_P (i2) && NOTE_KIND (i2) == NOTE_INSN_EPILOGUE_BEG) - break; - i2 = NEXT_INSN (i2); - } + i2 = NEXT_INSN (i2); if ((i1 == BB_END (bb1) && !NONDEBUG_INSN_P (i1)) || (i2 == BB_END (bb2) && !NONDEBUG_INSN_P (i2))) Index: gcc/df-problems.c === --- gcc.orig/df-problems.c +++ gcc/df-problems.c @@ -3953,8 +3953,6 @@ can_move_insns_across (rtx from, rtx to, { if (CALL_P (insn)) break; - if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) - break; if (NONDEBUG_INSN_P (insn)) { if (may_trap_or_fault_p (PATTERN (insn)) Index: gcc/dwarf2out.c === --- gcc.orig/dwarf2out.c +++ gcc/dwarf2out.c @@ -2939,10 +2939,10 @@ dwarf2out_frame_debug (rtx insn, bool af dwarf2out_flush_queued_reg_saves (); } -/* Determine if we need to save and restore CFI information around this - epilogue. If SIBCALL is true, then this is a sibcall epilogue. If - we do need to save/restore, then emit the save now, and insert a - NOTE_INSN_CFA_RESTORE_STATE at the appropriate place in the stream. */ +/* Determine if we need to save and restore CFI information around + this epilogue. If we do need to save/restore, then emit the save + now, and insert a NOTE_INSN_CFA_RESTORE_STATE at the appropriate + place in the stream. */ void dwarf2out_cfi_begin_epilogue (rtx insn) @@ -2957,8 +2957,10 @@ dwarf2out_cfi_begin_epilogue (rtx insn) if (!INSN_P (i)) continue; - /* Look for both regular and sibcalls to end the block. */ - if (returnjump_p (i)) + /* Look for both regular and sibcalls to end the block. Various +optimization passes may cause us to jump to a common epilogue +tail, so we also accept simplejumps. */ + if (returnjump_p (i) || simplejump_p (i)) break; if (CALL_P (i) && SIBLING_CALL_P (i)) break;