Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-15 Thread Bernd Schmidt
On 04/13/2011 02:38 PM, Bernd Schmidt wrote:
> This still requires the i386 output_set_got which I think I can cope
> with [...]

Patch below, to be applied on top of all the others. Only lightly tested
so far beyond standard (fairly useless) regression tests, by comparing
generated assembly before/after, for -fpic -march=pentium and core2, for
i686-linux and i686-apple-darwin10 (for TARGET_MACHO).

I've not found or managed to create a testcase for making MI thunks
generate a call to output_set_got. I think it should work, but it's not
tested.


Bernd
* config/i386/i386.c (output_set_got): Don't call
dwarf2out_flush_queued_reg_saves.
(ix86_reorg): Split set_got patterns.
(i386_dwarf_handle_frame_unspec,
i386_dwarf_flush_queued_register_saves): New static functions.
(TARGET_DWARF_HANDLE_FRAME_UNSPEC,
TARGET_DWARF_FLUSH_QUEUED_REGISTER_SAVES): New macros.
* config/i386/i386.md (set_got_call, set_got_pop, set_got_add):
New patterns.
* dwarf2out.c (scan_until_barrier): Use the target's
dwarf_flush_queued_register_saves hook.
* target.def (dwarf_flush_queued_register_saves): New hook.
* doc/tm.texi: Regenerate.
Index: gcc/config/i386/i386.c
===
--- gcc.orig/config/i386/i386.c
+++ gcc/config/i386/i386.c
@@ -8953,6 +8953,8 @@ output_set_got (rtx dest, rtx label ATTR
output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
   else
{
+ /* For normal functions, this pattern is split, but we can still
+get here for thunks.  */
  output_asm_insn ("call\t%a2", xops);
 #ifdef DWARF2_UNWIND_INFO
  /* The call to next label acts as a push.  */
@@ -9010,12 +9012,6 @@ output_set_got (rtx dest, rtx label ATTR
   get_pc_thunk_name (name, REGNO (dest));
   pic_labels_used |= 1 << REGNO (dest);
 
-#ifdef DWARF2_UNWIND_INFO
-  /* Ensure all queued register saves are flushed before the
-call.  */
-  if (dwarf2out_do_frame ())
-   dwarf2out_flush_queued_reg_saves ();
-#endif
   xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
   xops[2] = gen_rtx_MEM (QImode, xops[2]);
   output_asm_insn ("call\t%X2", xops);
@@ -30458,6 +30454,56 @@ ix86_reorg (void)
  with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   compute_bb_for_insn ();
 
+  /* Split any set_got patterns so that we interact correctly with
+ dwarf2out.  */
+  if (!TARGET_64BIT && !TARGET_VXWORKS_RTP && !TARGET_DEEP_BRANCH_PREDICTION
+  && flag_pic)
+{
+  rtx insn, next;
+  for (insn = get_insns (); insn; insn = next)
+   {
+ rtx pat, label, dest, cst, gotsym, new_insn;
+ int icode;
+
+ next = NEXT_INSN (insn);
+ if (!NONDEBUG_INSN_P (insn))
+   continue;
+
+ icode = recog_memoized (insn);
+ if (icode != CODE_FOR_set_got && icode != CODE_FOR_set_got_labelled)
+   continue;
+
+ extract_insn (insn);
+ if (icode == CODE_FOR_set_got)
+   {
+ label = gen_label_rtx ();
+ cst = const0_rtx;
+   }
+ else
+   {
+ label = recog_data.operand[1];
+ cst = const1_rtx;
+   }
+
+ dest = recog_data.operand[0];
+ pat = gen_set_got_call (label, cst);
+ new_insn = emit_insn_before (pat, insn);
+ RTX_FRAME_RELATED_P (new_insn) = 1;
+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (new_insn), 0, 1)) = 1;
+ gotsym = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+ pat = gen_set_got_pop (dest, label);
+ new_insn = emit_insn_before (pat, insn);
+ RTX_FRAME_RELATED_P (new_insn) = 1;
+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (new_insn), 0, 1)) = 1;
+ if (!TARGET_MACHO)
+   {
+ pat = gen_set_got_add (dest, gotsym, label);
+ new_insn = emit_insn_before (pat, insn);
+   }
+ delete_insn (insn);
+   }
+}
+
   if (optimize && optimize_function_for_speed_p (cfun))
 {
   if (TARGET_PAD_SHORT_FUNCTION)
@@ -30475,6 +30521,30 @@ ix86_reorg (void)
 move_or_delete_vzeroupper ();
 }
 
+/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
+   This is called from dwarf2out.c to emit call frame instructions
+   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs.  */
+static void
+i386_dwarf_handle_frame_unspec (rtx pattern ATTRIBUTE_UNUSED,
+   int index ATTRIBUTE_UNUSED)
+{
+  gcc_assert (index == UNSPEC_SET_GOT);
+}
+
+/* Handle the TARGET_DWARF_FLUSH_QUEUED_REGISTER_SAVES hook.
+   This is called from dwarf2out.c to decide whether all queued
+   register saves should be emitted before INSN.  */
+static bool
+i386_dwarf_flush_queued_register_saves (rtx insn)
+{
+  if (!TARGET_VXWORKS_RTP || !flag_pic)
+{
+  int icode = recog_memoized (insn);
+  return (

Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-13 Thread Bernd Schmidt
The final part, an updated version of the old 004-dw2cfg patch. This
does much better placement of remember/restore; in almost all cases the
code is identical to what we currently generate, modulo minor
differences around the PROLOGUE_END label. I've made it emit queued
register saves before PROLOGUE_END so that we can use the state there
for forced labels.


Bernd
Index: gcc/dwarf2out.c
===
--- gcc.orig/dwarf2out.c
+++ gcc/dwarf2out.c
@@ -465,12 +465,11 @@ static void initial_return_save (rtx);
 static HOST_WIDE_INT stack_adjust_offset (const_rtx, HOST_WIDE_INT,
  HOST_WIDE_INT);
 static void output_cfi (dw_cfi_ref, dw_fde_ref, int);
-static void output_cfi_directive (dw_cfi_ref);
+static void output_cfi_directive (FILE *, dw_cfi_ref);
 static void output_call_frame_info (int);
 static void dwarf2out_note_section_used (void);
 static bool clobbers_queued_reg_save (const_rtx);
 static void dwarf2out_frame_debug_expr (rtx);
-static void dwarf2out_cfi_begin_epilogue (rtx);
 static void dwarf2out_frame_debug_restore_state (void);
 
 /* Support for complex CFA locations.  */
@@ -823,9 +822,6 @@ new_cfi (void)
 /* The insn after which a new CFI note should be emitted.  */
 static rtx cfi_insn;
 
-/* True if remember_state should be emitted before following CFI directive.  */
-static bool emit_cfa_remember;
-
 /* True if any CFI directives were emitted at the current insn.  */
 static bool any_cfis_emitted;
 
@@ -868,28 +864,34 @@ dwarf2out_maybe_emit_cfi_label (void)
 }
 }
 
+static void
+add_cfa_remember (void)
+{
+  dw_cfi_ref cfi_remember;
+
+  /* Emit the state save.  */
+  cfi_remember = new_cfi ();
+  cfi_remember->dw_cfi_opc = DW_CFA_remember_state;
+  add_fde_cfi (cfi_remember);
+}
+
+/* Nonnull if add_fde_cfi should not just emit a NOTE_INSN_CFI, but
+   also add the CFI to this vector.  */
+static cfi_vec *cfi_insn_vec;
+
 /* Add CFI to the current fde at the PC value indicated by LABEL if specified,
or to the CIE if LABEL is NULL.  */
 
 static void
 add_fde_cfi (dw_cfi_ref cfi)
 {
-  if (emit_cfa_remember)
-{
-  dw_cfi_ref cfi_remember;
-
-  /* Emit the state save.  */
-  emit_cfa_remember = false;
-  cfi_remember = new_cfi ();
-  cfi_remember->dw_cfi_opc = DW_CFA_remember_state;
-  add_fde_cfi (cfi_remember);
-}
-
   any_cfis_emitted = true;
   if (cfi_insn != NULL)
 {
   cfi_insn = emit_note_after (NOTE_INSN_CFI, cfi_insn);
   NOTE_CFI (cfi_insn) = cfi;
+  if (cfi_insn_vec != NULL)
+   VEC_safe_push (dw_cfi_ref, gc, *cfi_insn_vec, cfi);
 }
   else
 {
@@ -980,12 +982,6 @@ static dw_cfa_location old_cfa;
from the CFA.  */
 static dw_cfa_location cfa_store;
 
-/* The current save location around an epilogue.  */
-static dw_cfa_location cfa_remember;
-
-/* Like cfa_remember, but a copy of old_cfa.  */
-static dw_cfa_location old_cfa_remember;
-
 /* The running total of the size of arguments pushed onto the stack.  */
 static HOST_WIDE_INT args_size;
 
@@ -1339,179 +1335,6 @@ stack_adjust_offset (const_rtx pattern, 
   return offset;
 }
 
-/* Precomputed args_size for CODE_LABELs and BARRIERs preceeding them,
-   indexed by INSN_UID.  */
-
-static HOST_WIDE_INT *barrier_args_size;
-
-/* Helper function for compute_barrier_args_size.  Handle one insn.  */
-
-static HOST_WIDE_INT
-compute_barrier_args_size_1 (rtx insn, HOST_WIDE_INT cur_args_size,
-VEC (rtx, heap) **next)
-{
-  HOST_WIDE_INT offset = 0;
-  int i;
-
-  if (! RTX_FRAME_RELATED_P (insn))
-{
-  if (prologue_epilogue_contains (insn))
-   /* Nothing */;
-  else if (GET_CODE (PATTERN (insn)) == SET)
-   offset = stack_adjust_offset (PATTERN (insn), cur_args_size, 0);
-  else if (GET_CODE (PATTERN (insn)) == PARALLEL
-  || GET_CODE (PATTERN (insn)) == SEQUENCE)
-   {
- /* There may be stack adjustments inside compound insns.  Search
-for them.  */
- for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
-   if (GET_CODE (XVECEXP (PATTERN (insn), 0, i)) == SET)
- offset += stack_adjust_offset (XVECEXP (PATTERN (insn), 0, i),
-cur_args_size, offset);
-   }
-}
-  else
-{
-  rtx expr = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
-
-  if (expr)
-   {
- expr = XEXP (expr, 0);
- if (GET_CODE (expr) == PARALLEL
- || GET_CODE (expr) == SEQUENCE)
-   for (i = 1; i < XVECLEN (expr, 0); i++)
- {
-   rtx elem = XVECEXP (expr, 0, i);
-
-   if (GET_CODE (elem) == SET && !RTX_FRAME_RELATED_P (elem))
- offset += stack_adjust_offset (elem, cur_args_size, offset);
- }
-   }
-}
-
-#ifndef STACK_GROWS_DOWNWARD
-  offset = -offset;
-#endif
-
-  cur_args_size += offset;
-  if (cur_args_size < 0)
-

Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-13 Thread Bernd Schmidt
The second part is a new patch, which reduces the amount of different
code paths we can take in add_fde_cfi, as this was becoming
unmanageable. The concept is to first emit just the CFI notes, in all
cases. Later, after we're done producing the CFI insns we need, another
pass over the rtl adds the necessary labels and set_loc/advance_loc
CFIs. One consequence of this is that def_cfa_1 can no longer use
lookup_cfa, so it just compares to an old_cfa variable instead. This
also requires target-specific changes as some ports use
dwarf2out_cfi_label. An (untested) example of the necessary changes is
in config/arm.


Bernd
---
 config/arm/arm.c |5 
 config/ia64/ia64.c   |6 
 config/sparc/sparc.c |7 
 config/vax/vax.c |2 
 dwarf2out.c  |  467 ---
 dwarf2out.h  |   32 +++
 final.c  |5 
 target.def   |2 
 tree.h   |   31 ---
 9 files changed, 270 insertions(+), 287 deletions(-)

Index: gcc/config/arm/arm.c
===
--- gcc.orig/config/arm/arm.c
+++ gcc/config/arm/arm.c
@@ -19977,18 +19977,19 @@ thumb_pushpop (FILE *f, unsigned long ma
 
   if (push && pushed_words && dwarf2out_do_frame ())
 {
-  char *l = dwarf2out_cfi_label (false);
   int pushed_mask = real_regs;
 
+  dwarf2out_maybe_emit_cfi_label ();
+
   *cfa_offset += pushed_words * 4;
-  dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
+  dwarf2out_def_cfa (SP_REGNUM, *cfa_offset);
 
   pushed_words = 0;
   pushed_mask = real_regs;
   for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
{
  if (pushed_mask & 1)
-   dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
+   dwarf2out_reg_save (regno, 4 * pushed_words++ - *cfa_offset);
}
 }
 }
@@ -20997,10 +20998,9 @@ thumb1_output_function_prologue (FILE *f
 the stack pointer.  */
   if (dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label (false);
-
+ dwarf2out_maybe_emit_cfi_label ();
  cfa_offset = cfa_offset + crtl->args.pretend_args_size;
- dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
+ dwarf2out_def_cfa (SP_REGNUM, cfa_offset);
}
 }
 
@@ -21046,10 +21046,10 @@ thumb1_output_function_prologue (FILE *f
 
   if (dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label (false);
+ dwarf2out_maybe_emit_cfi_label ();
 
  cfa_offset = cfa_offset + 16;
- dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
+ dwarf2out_def_cfa (SP_REGNUM, cfa_offset);
}
 
   if (l_mask)
@@ -22749,7 +22749,7 @@ arm_except_unwind_info (struct gcc_optio
stack alignment.  */
 
 static void
-arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
+arm_dwarf_handle_frame_unspec (rtx pattern, int index)
 {
   rtx unspec = SET_SRC (pattern);
   gcc_assert (GET_CODE (unspec) == UNSPEC);
@@ -22760,8 +22760,7 @@ arm_dwarf_handle_frame_unspec (const cha
   /* ??? We should set the CFA = (SP & ~7).  At this point we haven't
  put anything on the stack, so hopefully it won't matter.
  CFA = SP will be correct after alignment.  */
-  dwarf2out_reg_save_reg (label, stack_pointer_rtx,
-  SET_DEST (pattern));
+  dwarf2out_reg_save_reg (stack_pointer_rtx, SET_DEST (pattern));
   break;
 default:
   gcc_unreachable ();
Index: gcc/config/ia64/ia64.c
===
--- gcc.orig/config/ia64/ia64.c
+++ gcc/config/ia64/ia64.c
@@ -330,7 +330,7 @@ static enum machine_mode ia64_promote_fu
 static void ia64_trampoline_init (rtx, tree, rtx);
 static void ia64_override_options_after_change (void);
 
-static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
+static void ia64_dwarf_handle_frame_unspec (rtx, int);
 static tree ia64_builtin_decl (unsigned, bool);
 
 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
@@ -9710,9 +9710,7 @@ ia64_dwarf2out_def_steady_cfa (rtx insn,
processing.  The real CFA definition is set up above.  */
 
 static void
-ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
-   rtx ARG_UNUSED (pattern),
-   int index)
+ia64_dwarf_handle_frame_unspec (rtx ARG_UNUSED (pattern), int index)
 {
   gcc_assert (index == UNSPECV_ALLOC);
 }
Index: gcc/config/sparc/sparc.c
===
--- gcc.orig/config/sparc/sparc.c
+++ gcc/config/sparc/sparc.c
@@ -454,7 +454,7 @@ static unsigned int sparc_function_arg_b
 const_tree);
 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
enum machine_mode, tree, bool);
-static void sparc_dwarf_handle_frame_unspec (const cha

Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-13 Thread Bernd Schmidt
On 04/13/2011 04:14 PM, Bernd Schmidt wrote:
> On 04/11/2011 07:10 PM, Richard Henderson wrote:
>> Ok.
> 
> Did you receive my reply to this message from earlier today? It doesn't
> seem to have made it to gcc-patches yet.

Since gcc-patches appears to have dropped the message, I'll resend it in
three parts.

There are three patches here, but they must be applied together (things
will mostly work otherwise, but I expect -freorder-blocks-and-partition
is broken in the intermediate stages). Below is the ChangeLog for the
entire set, and the first of the patches. This is just a new version of
the previously posted 002-scanfirst patch, now changed to delete the CFI
notes afterwards in order to avoid -fcompare-debug failures.


Bernd

* target.def (dwarf_handle_frame_unspec): Remove label argument.
* doc/tm.texi: Regenerate.
* tree.h (dwarf2out_cfi_label, dwarf2out_def_cfa,
dwarf2out_window_save, dwarf2out_reg_save, dwarf2out_return_save,
dwarf2out_return_reg, dwarf2out_reg_save_reg): Don't declare.
* final.c (final_start_function): Call
dwarf2out_frame_debug_after_prologue.
(final_scan_insn): Don't call dwarf2out_frame_debug for anything.
Handle NOTE_INSN_CFI and NOTE_INSN_CFI_LABEL.
(final): Delete these notes.
* insn-notes.def (CFI, CFI_LABEL): New.
* jump.c (addr_vec_p): New function.
* dwarf2out.c (cfi_insn): New static variable.
(dwarf2out_cfi_label): Remove force argument. All callers changed.
Only generate the label, don't emit it.
(dwarf2out_maybe_emit_cfi_label): New function.

(add_fde_cfi): Remove label argument.  All callers changed.  Remove
most code; leave a condition to either emit a CFI insn, or add the
CFI to the FDE CFI vector.
(add_cie_cfi): New static function.
(add_cfi): Remove function.
(old_cfa): New static variable.
(cfa_remember): Remove static variable.
(dwarf2out_def_cfa): Replace label argument with a bool for_cie
argument.  All callers changed.  Don't use lookup_cfa; use and
update the global old_cfa variable.  Call add_fde_cfi or add_cie_cfi
at the end.
(reg_save): Replace label argument with a bool.  All callers changed.
Call add_fde_cfi or add_cie_cfi at the end.
(dwarf2out_reg_save, dwarf2out_return_save, dwarf2out_return_reg,
dwarf2out_args_szie, dwarf2out_stack_adjust, dwarf2out_reg_save_reg,
dwarf2out_frame_debug_def_cfa, dwarf2out_frame_debug_cfa_offset,
dwarf2out_frame_debug_cfa_register, dwarf2out_frame_debug_cfa_restore,
dwarf2out_frame_debug_cfa_expression, dwarf2out_frame_debug_expr):
Remove label argument.  All callers changed.
(barrier_args_size): Remove variable.
(compute_barrier_args_size_1, compute_barrier_args_size): Remove
functions.
(dwarf2out_notice_stack_adjust): Don't handle barriers.
(last_reg_save_label): Remove variable.  All sets and uses removed.
(cfi_label_required_p, add_cfis_to_fde): New static functions.
(dwarf2out_frame_debug_restore_state): Simply add the new CFI.
(dwarf2out_frame_debug): Set cfi_insn, and clear it.  Don't call
dwarf2out_flush_queued_reg_saves at the top.
(dwarf2out_frame_debug_init): Initialize old_cfa.
(copy_cfi_vec_parts): New static function.
(jump_target_info): New struct type.
(dwarf2out_cfi_begin_epilogue): Remove.
(save_point_p, record_current_state, maybe_record_jump_target,
vec_is_prefix_of, append_extra_cfis, debug_cfi_vec, switch_note_p,
scan_until_barrier, find_best_starting_point): New static functions.
(dwarf2out_frame_debug_after_prologue): New function.
(dwarf2out_emit_cfi): New function.
(output_cfi_directive): New FILE argument.  All callers changed.
Avoid some paths if it is not asm_out_file; otherwise print to it.
(output_all_cfis): Remove function.
(output_cfis): Remove do_cfi_asm arg.  All callers changed.  Never
call output_cfi_directive.
(dwarf2out_frame_init): Initialize old_cfa.
(dwarf2out_switch_text_section): Don't initialize dw_fde_current_label.
Don't call output_all_cfis.
* dwarf2out.h (dwarf2out_cfi_label, dwarf2out_def_cfa,
dwarf2out_window_save, dwarf2out_reg_save, dwarf2out_return_save,
dwarf2out_return_reg, dwarf2out_reg_save_reg, dwarf2out_emit_cfi,
dwarf2out_frame_debug_after_prologue): Declare.
(dwarf2out_cfi_begin_epilogue, dwarf2out_frame_debug_restore_state):
Don't declare.
(struct dw_cfi_struct): Add forward declaration.
* rtl.h (union rtunion_def): Add rt_cfi member.
(XCFI, XCCFI, NOTE_CFI, NOTE_LABEL_NUMBER): New macros.
(addr_vec_p): Declare.
* config/sparc/sparc.c (sparc_dwarf_handle_frame_unspec): Remove
 

Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-13 Thread Jakub Jelinek
On Wed, Apr 13, 2011 at 07:44:26AM -0700, Richard Henderson wrote:
> Yeah, while I was working on dwarf line numbers recently, I found that
> just about the only thing that would produce anything interesting was
> a profiled bootstrap.

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48253#c1

is what I've been using when I touched dwarf2out recently.

Jakub


Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-13 Thread Richard Henderson
On 04/13/2011 05:38 AM, Bernd Schmidt wrote:
> This bootstraps and tests ok on i686-linux. However, there is work left
> to be done. Can I take you up on your offer to work with me on this?
> This still requires the i386 output_set_got which I think I can cope
> with, but the ia64 backend does a number of things with unwinding that I
> don't understand. Also, I'll be away the next two weeks - if you arrive
> at a complete version during that time it would be great if you could
> commit it.

Ok, I'll put this on my to-do list.

> One thing to note is that it seems surprisingly hard to make
> -freorder-blocks-and-partition do anything interesting. There's one C++
> testcase (partition2.C I think) which I used to debug this code, but
> other than that I haven't really found anything that actually generates
> two nonempty partitions.

Yeah, while I was working on dwarf line numbers recently, I found that
just about the only thing that would produce anything interesting was
a profiled bootstrap.


r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-13 Thread Bernd Schmidt
On 04/11/2011 07:10 PM, Richard Henderson wrote:
> Ok.

Did you receive my reply to this message from earlier today? It doesn't
seem to have made it to gcc-patches yet.


Bernd


Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-11 Thread Richard Henderson
On 04/05/2011 02:58 PM, Bernd Schmidt wrote:
>   * dwarf2out.c (struct dw_cfi_struct): Remove member dw_cfi_next.
>   (dw_cfi_ref): Add DEF_VEC_P and some DEF_VEC_ALLOC_Ps.
>   (cfi_vec): New typedef.
>   (struct dw_fde_struct): Make dw_fde_cfi a cfi_vec. Replace
>   dw_fde_switch_cfi with an integer dw_fde_switch_cfi_index.
>   (cie_cfi_vec): New static variable.
>   (cie_cfi_head): Delete.
>   (add_cfi): Accept a cfi_vec * as first argument. All callers and
>   declaration changed. Use vector rather than list operations.
>   (new_cfi): Don't initialize the dw_cfi_next field.
>   (add_fde_cfi): Allocate cie_cfi_vec if necessary. Use vector
>   rather than list operations.
>   (lookup_cfa): Use vector rather than list operations.
>   (output_cfis): New argument upto. Accept a cfi_vec rather than
>   a dw_cfi_ref list head as argument. All callers changed.
>   Iterate over the vector using upto as a maximum index.
>   (output_all_cfis): New static function.
>   (output_fde): Use vector rather than list operations. Use the
>   new upto argument for output_cfis rather than manipulating a
>   list.
>   (dwarf2out_begin_prologue): Change initializations to match
>   new struct members.
>   (dwarf2out_switch_text_section): Initialize dw_fde_switch_cfi_index
>   from the vector length rather than searching for the end of a list.
>   Use output_all_cfis.
>   (convert_cfa_to_fb_loc_list): Use vector rather than list operations.

Ok.


r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-04-05 Thread Bernd Schmidt
On 03/31/2011 11:28 PM, Richard Henderson wrote:

>> 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks
>>   larger than it is due to reindentation
> 
> Like 001, this one looks like it's totally independent of and of
> the other changes, and a good cleanup.  Please go ahead and test
> and commit this one independently.

Here's a new version - the code had changed underneath me in the
meantime, and I had some off-by-one errors involving the switch_index.
Bootstrapped and tested on i686-linux; I've also built my set of
testcases with -freorder-blocks-and-partition without code generation
changes from before to after. Ok?


Bernd
* dwarf2out.c (struct dw_cfi_struct): Remove member dw_cfi_next.
(dw_cfi_ref): Add DEF_VEC_P and some DEF_VEC_ALLOC_Ps.
(cfi_vec): New typedef.
(struct dw_fde_struct): Make dw_fde_cfi a cfi_vec. Replace
dw_fde_switch_cfi with an integer dw_fde_switch_cfi_index.
(cie_cfi_vec): New static variable.
(cie_cfi_head): Delete.
(add_cfi): Accept a cfi_vec * as first argument. All callers and
declaration changed. Use vector rather than list operations.
(new_cfi): Don't initialize the dw_cfi_next field.
(add_fde_cfi): Allocate cie_cfi_vec if necessary. Use vector
rather than list operations.
(lookup_cfa): Use vector rather than list operations.
(output_cfis): New argument upto. Accept a cfi_vec rather than
a dw_cfi_ref list head as argument. All callers changed.
Iterate over the vector using upto as a maximum index.
(output_all_cfis): New static function.
(output_fde): Use vector rather than list operations. Use the
new upto argument for output_cfis rather than manipulating a
list.
(dwarf2out_begin_prologue): Change initializations to match
new struct members.
(dwarf2out_switch_text_section): Initialize dw_fde_switch_cfi_index
from the vector length rather than searching for the end of a list.
Use output_all_cfis.
(convert_cfa_to_fb_loc_list): Use vector rather than list operations.

Index: dwarf2out.c
===
--- dwarf2out.c (revision 171839)
+++ dwarf2out.c (working copy)
@@ -267,7 +267,6 @@ typedef union GTY(()) dw_cfi_oprnd_struc
 dw_cfi_oprnd;
 
 typedef struct GTY(()) dw_cfi_struct {
-  dw_cfi_ref dw_cfi_next;
   enum dwarf_call_frame_info dw_cfi_opc;
   dw_cfi_oprnd GTY ((desc ("dw_cfi_oprnd1_desc (%1.dw_cfi_opc)")))
 dw_cfi_oprnd1;
@@ -276,6 +275,12 @@ typedef struct GTY(()) dw_cfi_struct {
 }
 dw_cfi_node;
 
+DEF_VEC_P (dw_cfi_ref);
+DEF_VEC_ALLOC_P (dw_cfi_ref, heap);
+DEF_VEC_ALLOC_P (dw_cfi_ref, gc);
+
+typedef VEC(dw_cfi_ref, gc) *cfi_vec;
+
 /* This is how we define the location of the CFA. We use to handle it
as REG + OFFSET all the time,  but now it can be more complex.
It can now be either REG + CFA_OFFSET or *(REG + BASE_OFFSET) + CFA_OFFSET.
@@ -304,8 +309,8 @@ typedef struct GTY(()) dw_fde_struct {
   const char *dw_fde_vms_begin_epilogue;
   const char *dw_fde_second_begin;
   const char *dw_fde_second_end;
-  dw_cfi_ref dw_fde_cfi;
-  dw_cfi_ref dw_fde_switch_cfi; /* Last CFI before switching sections.  */
+  cfi_vec dw_fde_cfi;
+  int dw_fde_switch_cfi_index; /* Last CFI before switching sections.  */
   HOST_WIDE_INT stack_realignment;
   unsigned funcdef_number;
   /* Dynamic realign argument pointer register.  */
@@ -410,8 +415,8 @@ current_fde (void)
   return fde_table_in_use ? &fde_table[fde_table_in_use - 1] : NULL;
 }
 
-/* A list of call frame insns for the CIE.  */
-static GTY(()) dw_cfi_ref cie_cfi_head;
+/* A vector of call frame insns for the CIE.  */
+static GTY(()) cfi_vec cie_cfi_vec;
 
 /* Some DWARF extensions (e.g., MIPS/SGI) implement a subprogram
attribute that accelerates the lookup of the FDE associated
@@ -451,7 +456,7 @@ static GTY(()) section *cold_text_sectio
 static char *stripattributes (const char *);
 static const char *dwarf_cfi_name (unsigned);
 static dw_cfi_ref new_cfi (void);
-static void add_cfi (dw_cfi_ref *, dw_cfi_ref);
+static void add_cfi (cfi_vec *, dw_cfi_ref);
 static void add_fde_cfi (const char *, dw_cfi_ref);
 static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *, dw_cfa_location *);
 static void lookup_cfa (dw_cfa_location *);
@@ -807,7 +812,6 @@ new_cfi (void)
 {
   dw_cfi_ref cfi = ggc_alloc_dw_cfi_node ();
 
-  cfi->dw_cfi_next = NULL;
   cfi->dw_cfi_oprnd1.dw_cfi_reg_num = 0;
   cfi->dw_cfi_oprnd2.dw_cfi_reg_num = 0;
 
@@ -817,9 +821,8 @@ new_cfi (void)
 /* Add a Call Frame Instruction to list of instructions.  */
 
 static inline void
-add_cfi (dw_cfi_ref *list_head, dw_cfi_ref cfi)
+add_cfi (cfi_vec *vec, dw_cfi_ref cfi)
 {
-  dw_cfi_ref *p;
   dw_fde_ref fde = current_fde ();
 
   /* When DRAP is used, CFA is defined with an expression.  Redefine
@@ -841,11 +844,7 @@ add_cfi (dw_cfi_ref *list_head, dw_cfi_r
  

Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-31 Thread Richard Henderson
On 03/31/2011 03:07 PM, Bernd Schmidt wrote:
> No, it's used - but it looks like I forgot to quilt refresh and the
> final.c changes weren't included. New patch below. After this patch, the
> whole function is processed before final, and rather than emitting cfi
> directives immediately, we create these notes which cause the directives
> to be emitted during final.

Ah, much better.  I had wondered what I was missing.

> This probably shouldn't be committed separately when these changes go
> in, as (I think) it breaks -freorder-blocks-and-partition as well as the
> code in i386.c; it's split out simply to show an intermediate stage.

Sure.

> Yes, this falls under "inefficient CFI insns". I wanted to post a
> preliminary proof-of-concept patch set now which generates
> correct(-looking) output, but not necessarily optimized output. Not
> quite sure yet how to tackle this but I'll think of something.

Ok, I'll go ahead and apply all the patches locally and see what the
output actually looks like.  Perhaps I'll have more suggestions.


r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-31 Thread Bernd Schmidt
On 03/31/2011 11:28 PM, Richard Henderson wrote:
>> Rather than use a CFG, I've tried to do something similar to
>> compute_barrier_args_size, using JUMP_LABELs etc.
> 
> A reasonable solution for now, I suppose.

Ok, sounds like you haven't discovered a fatal flaw; I'll keep hacking
on it.

>> Summary of the patches:
>> 001 - just create a dwarf2out_frame_debug_init function.
> 
> Ok.
> 
>> 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks
>>   larger than it is due to reindentation
> 
> Like 001, this one looks like it's totally independent of and of
> the other changes, and a good cleanup.  Please go ahead and test
> and commit this one independently.

Will do.

>> 002 - Make it walk the function in a first pass and record CFIs to
>>   be output later
>
> Do I correctly understand that NOTE_INSN_CFI isn't actually being
> used in this patch?

No, it's used - but it looks like I forgot to quilt refresh and the
final.c changes weren't included. New patch below. After this patch, the
whole function is processed before final, and rather than emitting cfi
directives immediately, we create these notes which cause the directives
to be emitted during final.

This probably shouldn't be committed separately when these changes go
in, as (I think) it breaks -freorder-blocks-and-partition as well as the
code in i386.c; it's split out simply to show an intermediate stage.

>> 004 - Change the function walk introduced in 002 so that it records
>>   and restores state when reaching jumps/barriers
> 
> I'm not too fond of vec_is_prefix_of.  The Problem is that you're
> not applying any understanding of the actual data, just doing a
> string comparison (effectively).

Yes, this falls under "inefficient CFI insns". I wanted to post a
preliminary proof-of-concept patch set now which generates
correct(-looking) output, but not necessarily optimized output. Not
quite sure yet how to tackle this but I'll think of something.


Bernd
* cfgcleanup.c (flow_find_head_matching_sequence): Ignore
epilogue notes.
* df-problems.c (can_move_insns_across): Don't stop at epilogue
notes.
* dwarf2out.c (dwarf2out_cfi_begin_epilogue): Also allow a
simplejump to end the block.

Index: gcc/dwarf2out.c
===
--- gcc.orig/dwarf2out.c
+++ gcc/dwarf2out.c
@@ -471,6 +471,8 @@ static void output_call_frame_info (int)
 static void dwarf2out_note_section_used (void);
 static bool clobbers_queued_reg_save (const_rtx);
 static void dwarf2out_frame_debug_expr (rtx, const char *);
+static void dwarf2out_cfi_begin_epilogue (rtx);
+static void dwarf2out_frame_debug_restore_state (void);
 
 /* Support for complex CFA locations.  */
 static void output_cfa_loc (dw_cfi_ref, int);
@@ -879,6 +881,9 @@ dwarf2out_cfi_label (bool force)
   return label;
 }
 
+/* The insn after which a new CFI note should be emitted.  */
+static rtx cfi_insn;
+
 /* True if remember_state should be emitted before following CFI directive.  */
 static bool emit_cfa_remember;
 
@@ -961,7 +966,8 @@ add_fde_cfi (const char *label, dw_cfi_r
}
}
 
- output_cfi_directive (cfi);
+ cfi_insn = emit_note_after (NOTE_INSN_CFI, cfi_insn);
+ NOTE_CFI (cfi_insn) = cfi;
 
  list_head = &fde->dw_fde_cfi;
  any_cfis_emitted = true;
@@ -2790,6 +2796,11 @@ dwarf2out_frame_debug (rtx insn, bool af
   rtx note, n;
   bool handled_one = false;
 
+  if (after_p)
+cfi_insn = insn;
+  else
+cfi_insn = PREV_INSN (insn);
+
   if (!NONJUMP_INSN_P (insn) || clobbers_queued_reg_save (insn))
 dwarf2out_flush_queued_reg_saves ();
 
@@ -2911,6 +2922,7 @@ void
 dwarf2out_frame_debug_init (void)
 {
   size_t i;
+  rtx insn;
 
   /* Flush any queued register saves.  */
   dwarf2out_flush_queued_reg_saves ();
@@ -2937,12 +2949,64 @@ dwarf2out_frame_debug_init (void)
   XDELETEVEC (barrier_args_size);
   barrier_args_size = NULL;
 }
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+{
+  rtx pat;
+  if (BARRIER_P (insn))
+   {
+ dwarf2out_frame_debug (insn, false);
+ continue;
+   }
+  else if (NOTE_P (insn))
+   {
+ switch (NOTE_KIND (insn))
+   {
+   case NOTE_INSN_EPILOGUE_BEG:
+#if defined (HAVE_epilogue)
+ dwarf2out_cfi_begin_epilogue (insn);
+#endif
+ break;
+   case NOTE_INSN_CFA_RESTORE_STATE:
+ cfi_insn = insn;
+ dwarf2out_frame_debug_restore_state ();
+ break;
+   }
+ continue;
+   }
+  if (!NONDEBUG_INSN_P (insn))
+   continue;
+  pat = PATTERN (insn);
+  if (asm_noperands (pat) >= 0)
+   continue;
+  if (GET_CODE (pat) == SEQUENCE)
+   {
+ int j;
+ for (j = 1; j < XVECLEN (pat, 0); j++)
+   dwarf2out_frame_debug (XVECEXP (pat, 0, j), false);
+ insn = XVECEXP (pat, 0

Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-31 Thread Richard Henderson
On 03/31/2011 12:59 PM, Bernd Schmidt wrote:
>> So long as late late compilation passes continue to not move frame-related
>> insns across basic block boundaries, we should be fine.
> 
> I'm nervous about this as the reorg pass can do arbitrary
> transformations. On Blackfin for example, we can reorder basic blocks
> for the sake of loop optimizations; sched-ebb can create new blocks,
> etc. I think it would be best if we can somehow make it work during
> final, without a CFG.

I guess that's the best thing for now.  I'm sure we all agree that long
term all transformations should preserve the CFG all the way through final.
At which point this could be implemented as a pass on the function after
all transformations are complete.

> Rather than use a CFG, I've tried to do something similar to
> compute_barrier_args_size, using JUMP_LABELs etc.

A reasonable solution for now, I suppose.

> 
> Summary of the patches:
> 001 - just create a dwarf2out_frame_debug_init function.

Ok.

> 002 - Make it walk the function in a first pass and record CFIs to
>   be output later

Do I correctly understand that NOTE_INSN_CFI isn't actually being
used in this patch?

> 003 - Store dw_cfi_refs in VECs rather than linked lists. Looks
>   larger than it is due to reindentation

Like 001, this one looks like it's totally independent of and of
the other changes, and a good cleanup.  Please go ahead and test
and commit this one independently.

> 004 - Change the function walk introduced in 002 so that it records
>   and restores state when reaching jumps/barriers

I'm not too fond of vec_is_prefix_of.  The Problem is that you're
not applying any understanding of the actual data, just doing a
string comparison (effectively).

Imagine two code paths A and B that both save R2 and R3 into their
respective stack slots.  Imagine that -- for whatever reason -- the
stores have been scheduled differently such that on path A R2 is 
saved before R3, and the reverse on path B.

Your prefix test will conclude that paths A and B end with different
unwind info, even though they are in fact compatible.

Using some mechanism by which we can compare aggregate CFI information
on a per-register basis ought to also vastly improve the efficiency in
adjusting the cfi info between code points.  It should also enable
proper information in the -freorder-blocks-and-partition case.

> * i386.c uses dwarf2out_frame_debug directly in some cases and is
>   unconverted

Hum.  I wonder what the best way to attack this.  It's a local change,
adjusting and then restoring the unwind state between two insns that
should not be scheduled separately.

We could turn them into two unspec_volatiles, and lose scheduling 
across this pattern.  But ideally this is a value that ought to be
shrink-wrapped.  It's expensive to compute, and there are many
early-return situations in which we don't need it.

I suppose we could split this pattern manually in i386 reorg; 
forcing this to be split before final even at -O0.  At that point
all shrink-wrapping would be done and an unspecv replacement 
would be ok.

> * I haven't tested whether my attempt to use
>   get_eh_landing_pad_from_rtx in the absence of a CFG actually works

It will.  This information is stored in cfun->eh.  By design this
information must survive until final, so that we can emit the 
actual eh info into the appropriate tables.

> * Computed jumps and nonlocal gotos aren't handled. I think this
>   could be done by recording the state at NOTE_INSN_PROLOGUE_END
>   and using that for all labels we can't otherwise reach.

That should be reasonable.  You could assert that all of these 
labels are in forced_labels.  All computed branch targets should
be listed therein.


r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-31 Thread Bernd Schmidt
On 03/26/2011 04:26 AM, Richard Henderson wrote:
> I think the ideal thing would be a pass while the cfg is still extant that
> captures the unwind info into notes; these can be recorded at basic block
> boundaries, so that they persist until the end of compilation.
> 
> So long as late late compilation passes continue to not move frame-related
> insns across basic block boundaries, we should be fine.

I'm nervous about this as the reorg pass can do arbitrary
transformations. On Blackfin for example, we can reorder basic blocks
for the sake of loop optimizations; sched-ebb can create new blocks,
etc. I think it would be best if we can somehow make it work during
final, without a CFG.

> I'm willing to work with you on the problem of cfg-aware unwind info.  We
> have needed this for a really long time; there are existing bugs related 
> to exception handling and !ACCUMULATE_OUTGOING_ARGS that would be fixed by
> this.

I'm appending a series of draft patches. It tries to compute CFIs for
stretches of straight-line code and records state for potential jump
targets, iterating until everything is covered. Then, a pass through all
insns from start to finish reassembles the pieces into a coherent string
of CFI insns.

Rather than use a CFG, I've tried to do something similar to
compute_barrier_args_size, using JUMP_LABELs etc.

Summary of the patches:
001 - just create a dwarf2out_frame_debug_init function.
002 - Make it walk the function in a first pass and record CFIs to
  be output later
003 - Store dw_cfi_refs in VECs rather than linked lists. Looks
  larger than it is due to reindentation
004 - Change the function walk introduced in 002 so that it records
  and restores state when reaching jumps/barriers

For now I'd just like some input on whether this looks remotely viable.
There are a number of known problems with it:
* The generated CFIs are inefficient (poor use of remember/restore)
* -freorder-blocks-and-partition is broken
* i386.c uses dwarf2out_frame_debug directly in some cases and is
  unconverted
* I haven't tested whether my attempt to use
  get_eh_landing_pad_from_rtx in the absence of a CFG actually works
* Computed jumps and nonlocal gotos aren't handled. I think this
  could be done by recording the state at NOTE_INSN_PROLOGUE_END
  and using that for all labels we can't otherwise reach.


Bernd
Index: gcc/dwarf2out.c
===
--- gcc.orig/dwarf2out.c
+++ gcc/dwarf2out.c
@@ -2790,38 +2790,6 @@ dwarf2out_frame_debug (rtx insn, bool af
   rtx note, n;
   bool handled_one = false;
 
-  if (insn == NULL_RTX)
-{
-  size_t i;
-
-  /* Flush any queued register saves.  */
-  dwarf2out_flush_queued_reg_saves ();
-
-  /* Set up state for generating call frame debug info.  */
-  lookup_cfa (&cfa);
-  gcc_assert (cfa.reg
- == (unsigned long)DWARF_FRAME_REGNUM (STACK_POINTER_REGNUM));
-
-  cfa.reg = STACK_POINTER_REGNUM;
-  cfa_store = cfa;
-  cfa_temp.reg = -1;
-  cfa_temp.offset = 0;
-
-  for (i = 0; i < num_regs_saved_in_regs; i++)
-   {
- regs_saved_in_regs[i].orig_reg = NULL_RTX;
- regs_saved_in_regs[i].saved_in_reg = NULL_RTX;
-   }
-  num_regs_saved_in_regs = 0;
-
-  if (barrier_args_size)
-   {
- XDELETEVEC (barrier_args_size);
- barrier_args_size = NULL;
-   }
-  return;
-}
-
   if (!NONJUMP_INSN_P (insn) || clobbers_queued_reg_save (insn))
 dwarf2out_flush_queued_reg_saves ();
 
@@ -2939,6 +2907,38 @@ dwarf2out_frame_debug (rtx insn, bool af
 dwarf2out_flush_queued_reg_saves ();
 }
 
+void
+dwarf2out_frame_debug_init (void)
+{
+  size_t i;
+
+  /* Flush any queued register saves.  */
+  dwarf2out_flush_queued_reg_saves ();
+
+  /* Set up state for generating call frame debug info.  */
+  lookup_cfa (&cfa);
+  gcc_assert (cfa.reg
+ == (unsigned long)DWARF_FRAME_REGNUM (STACK_POINTER_REGNUM));
+
+  cfa.reg = STACK_POINTER_REGNUM;
+  cfa_store = cfa;
+  cfa_temp.reg = -1;
+  cfa_temp.offset = 0;
+
+  for (i = 0; i < num_regs_saved_in_regs; i++)
+{
+  regs_saved_in_regs[i].orig_reg = NULL_RTX;
+  regs_saved_in_regs[i].saved_in_reg = NULL_RTX;
+}
+  num_regs_saved_in_regs = 0;
+
+  if (barrier_args_size)
+{
+  XDELETEVEC (barrier_args_size);
+  barrier_args_size = NULL;
+}
+}
+
 /* Determine if we need to save and restore CFI information around this
epilogue.  If SIBCALL is true, then this is a sibcall epilogue.  If
we do need to save/restore, then emit the save now, and insert a
Index: gcc/dwarf2out.h
===
--- gcc.orig/dwarf2out.h
+++ gcc/dwarf2out.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  
 
 extern void dwarf2out_decl (tree);
 extern void dwarf2out_frame_debug (rtx, bool);
+extern void dwarf2out_frame_debug_init (void);
 extern void dwarf2out_cfi_begin_epilogue 

Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-25 Thread Richard Henderson
On 03/25/2011 10:34 AM, Bernd Schmidt wrote:
> I don't know much about the unwinding code. I'm currently thinking about
> writing out a cfi_remember_state at the start of the function, restoring
> that clean state when necessary at the start of a new block and emitting
> the necessary directives to reach the correct state. What directives
> should I expect to be required? Can I get by just with cfi_offset and
> cfi_def_cfa_offset, or will something else be necessary?

Yes, several things: register, expression, gnu_args_size, perhaps a few more.

I think the ideal thing would be a pass while the cfg is still extant that
captures the unwind info into notes; these can be recorded at basic block
boundaries, so that they persist until the end of compilation.

So long as late late compilation passes continue to not move frame-related
insns across basic block boundaries, we should be fine.

Irritatingly, the exact place to locate this pass is difficult to pin down.
Immediately before md_reorg is the last place we have the cfg.  But we do
strange things in, e.g. ia64 where we rebuild the cfg and run sched_ebb
during md_reorg.

Of course, ia64 is a bad example because its unwind info is target-specific,
and quite a lot of the possible benefit of shrink wrapping is lost via the
register windowing.

I'm willing to work with you on the problem of cfg-aware unwind info.  We
have needed this for a really long time; there are existing bugs related 
to exception handling and !ACCUMULATE_OUTGOING_ARGS that would be fixed by
this.


r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-25 Thread Bernd Schmidt
On 03/23/2011 06:19 PM, Richard Henderson wrote:
> In general, with shrink-wrapping, we can have essentially arbitrary
> differences in unwind info between blocks that are sequential.

So, while that isn't the case just yet with the current shrink-wrapping
patch, it seems I will either have to make dwarf2out fully general, or
ensure that basic blocks occur in only a certain order (the prologue
must be written out only after all basic blocks that can be executed
before or without reaching it).

I don't know much about the unwinding code. I'm currently thinking about
writing out a cfi_remember_state at the start of the function, restoring
that clean state when necessary at the start of a new block and emitting
the necessary directives to reach the correct state. What directives
should I expect to be required? Can I get by just with cfi_offset and
cfi_def_cfa_offset, or will something else be necessary?


Bernd


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-24 Thread Bernd Schmidt
On 03/23/2011 06:27 PM, Richard Henderson wrote:
> On 03/23/2011 10:22 AM, Bernd Schmidt wrote:
>> On 03/23/2011 06:19 PM, Richard Henderson wrote:
>>> body
>>> body
>>> restore r1  XXX
>>> restore r2  XXX
>>> jmp L2  XXX
>>>
>>> L1: bodyYYY
>>> bodyYYY
>>> restore r2
>>>
>>> L2: restore r3
>>> return
>>
>>> In general, with shrink-wrapping, we can have essentially arbitrary
>>> differences in unwind info between blocks that are sequential.
>>
>> I don't think this can actually happen with the current implementation.
>> There is only one prologue, and all epilogues (the normal one and the
>> sibcall epilogues) match it exactly. I don't believe we can generate
>> code as in the example above, both before and after my patch.
> 
> Um.. then what's this "allow jumps in epilogues" thing of which you speak?
> If there's a jump, then it goes somewhere, and branches over something.
> I see no constraints on what that something might be.
> 
> Could you give an example of a transformation that is allowed by this?

The idea was to be able to share a single return instruction between
epilogue/non-epilogue return paths, so that e.g. on i686 a conditional
return could be implemented as a conditional jump to a common return
insn. The allow-jumps patch then becomes necessary because bbro can move
the blocks around.

It does seem, however, that bbro can in fact cause problems for the
unwind information when the prologue is no longer in the first block.
Let me try to come up with a solution for that.


Bernd


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-23 Thread Richard Henderson
On 03/23/2011 10:22 AM, Bernd Schmidt wrote:
> On 03/23/2011 06:19 PM, Richard Henderson wrote:
>>  body
>>  body
>>  restore r1  XXX
>>  restore r2  XXX
>>  jmp L2  XXX
>>
>> L1:  bodyYYY
>>  bodyYYY
>>  restore r2
>>
>> L2:  restore r3
>>  return
> 
>> In general, with shrink-wrapping, we can have essentially arbitrary
>> differences in unwind info between blocks that are sequential.
> 
> I don't think this can actually happen with the current implementation.
> There is only one prologue, and all epilogues (the normal one and the
> sibcall epilogues) match it exactly. I don't believe we can generate
> code as in the example above, both before and after my patch.

Um.. then what's this "allow jumps in epilogues" thing of which you speak?
If there's a jump, then it goes somewhere, and branches over something.
I see no constraints on what that something might be.

Could you give an example of a transformation that is allowed by this?



r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-23 Thread Bernd Schmidt
On 03/23/2011 06:19 PM, Richard Henderson wrote:
>   body
>   body
>   restore r1  XXX
>   restore r2  XXX
>   jmp L2  XXX
> 
> L1:   bodyYYY
>   bodyYYY
>   restore r2
> 
> L2:   restore r3
>   return

> In general, with shrink-wrapping, we can have essentially arbitrary
> differences in unwind info between blocks that are sequential.

I don't think this can actually happen with the current implementation.
There is only one prologue, and all epilogues (the normal one and the
sibcall epilogues) match it exactly. I don't believe we can generate
code as in the example above, both before and after my patch.


Bernd


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-23 Thread Richard Henderson
On 03/23/2011 09:48 AM, Bernd Schmidt wrote:
>> With no more code than this, I cannot believe you're generating correct
>> unwind info anymore.
> 
> Why not? Are you worried about the code at the destination of the jump?
> That should be preceded by another block falling through into it which
> also has a NOTE_INSN_EPILOGUE_BEG.
> 
> If that isn't the problem you have in mind, what is and how can we test
> for it?

body
body
restore r1  XXX
restore r2  XXX
jmp L2  XXX

L1: bodyYYY
bodyYYY
restore r2

L2: restore r3
return

Assume for the moment that "restore" on this target is something that
can't be delayed or repeated.  E.g. a pop, rather than a move which
leaves the saved value in memory at a unknown offset from the CFA.

This means we have to emit unwind directives immediately after the 
restore insn and cannot delay the epilogue unwind until we deallocate
the entire stack frame.

This means that your patch either gets the unwind info wrong for
the XXX sequence or the YYY sequence.

Correct unwind info would look like

body
body
.cfi_remember_state
restore r1
.cfi_restore r1
restore r2
.cfi_restore r2
jmp L2
.cfi_restore_state

L1: body
body
restore r2
.cfi_restore r2

L2: // validate the unwind info across the CFG making sure that the incoming
// edges contain the same unwind info here.
restore r3
.cfi_restore r3
return

In general, with shrink-wrapping, we can have essentially arbitrary
differences in unwind info between blocks that are sequential.  We have
to be prepared to fully adjust the unwind state between blocks.

Assume a { x } is the set of registers saved into the stack frame in a
given block.  We have both incoming and outgoing sets.

foo:// in: { }
cmp r1,r2
jne L1  // out: { }

L0: // in: { }
save r8
save r9
body
... // out: { r8, r9 }

L2: // in: { r8, r9, r10 }
body
body
... // out: { r8, r9, r10 }

L1: // in: { }
save r8
save r9
save r10
body
... // out: { r8, r9, r10 }

L3: // in: { r8, r9, r10 }
restore r10 // out: { r8, r9 }

L4: // in: { r8, r9 }
restore r9
restore r8
return

This layout requires more than just .cfi_remember_state/restore_state
between blocks.  We have to be prepared to emit full unwind info at
any point.  Assume cfi info marked with XXX exists between basic blocks
to fixup the transition points:

L0: save r8
save r9
.cfi_offset r8,x
.cfi_offset r9,y
body

.cfi_offset r10,z   XXX

L2: body
body

.cfi_restore r8 XXX
.cfi_restore r9 XXX
.cfi_restore r10XXX

L1: save r8
save r9
save r10
.cfi_offset r8,x
.cfi_offset r9,y
.cfi_offset r10,z
body

If this isn't clear, please ask questions.  The problem of unwinding is
way more complicated than what you appear to be assuming.


r~


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-23 Thread Bernd Schmidt
On 03/23/2011 05:46 PM, Richard Henderson wrote:
> On 03/23/2011 07:50 AM, Bernd Schmidt wrote:
>> dwarf2out has code that starts scanning from NOTE_INSN_EPILOGUE_BEG
>> until it finds the return jump. When there is common code in several
>> blocks ending in a return, we might want to share this, and in that case
>> it would be possible to encounter a simplejump rather than a returnjump.
>> This should be safe, and the following patch allows it.
> 
> With no more code than this, I cannot believe you're generating correct
> unwind info anymore.

Why not? Are you worried about the code at the destination of the jump?
That should be preceded by another block falling through into it which
also has a NOTE_INSN_EPILOGUE_BEG.

If that isn't the problem you have in mind, what is and how can we test
for it?


Bernd


Re: [PATCH 3/6] Allow jumps in epilogues

2011-03-23 Thread Richard Henderson
On 03/23/2011 07:50 AM, Bernd Schmidt wrote:
> dwarf2out has code that starts scanning from NOTE_INSN_EPILOGUE_BEG
> until it finds the return jump. When there is common code in several
> blocks ending in a return, we might want to share this, and in that case
> it would be possible to encounter a simplejump rather than a returnjump.
> This should be safe, and the following patch allows it.

With no more code than this, I cannot believe you're generating correct
unwind info anymore.

It would be possible to handle code merging including epilogue blocks
if (and IMO only if) you track unwind state on a per-block basis, and
propagate this information around the CFG, finally linearizing this
when blocks are re-ordered for the last time before final.

At present, sadly, we assume steady-state for the unwind info except
before PROLOGUE_END and after EPILOGUE_BEG.


r~


[PATCH 3/6] Allow jumps in epilogues

2011-03-23 Thread Bernd Schmidt
dwarf2out has code that starts scanning from NOTE_INSN_EPILOGUE_BEG
until it finds the return jump. When there is common code in several
blocks ending in a return, we might want to share this, and in that case
it would be possible to encounter a simplejump rather than a returnjump.
This should be safe, and the following patch allows it.


Bernd

* cfgcleanup.c (flow_find_head_matching_sequence): Ignore
epilogue notes.
* df-problems.c (can_move_insns_across): Don't stop at epilogue
notes.
* dwarf2out.c (dwarf2out_cfi_begin_epilogue): Also allow a
simplejump to end the block.

Index: gcc/cfgcleanup.c
===
--- gcc.orig/cfgcleanup.c
+++ gcc/cfgcleanup.c
@@ -1184,20 +1184,12 @@ flow_find_head_matching_sequence (basic_
 
   while (true)
 {
-  /* Ignore notes, except NOTE_INSN_EPILOGUE_BEG.  */
+  /* Ignore notes.  */
   while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
-   {
- if (NOTE_P (i1) && NOTE_KIND (i1) == NOTE_INSN_EPILOGUE_BEG)
-   break;
- i1 = NEXT_INSN (i1);
-   }
+   i1 = NEXT_INSN (i1);
 
   while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
-   {
- if (NOTE_P (i2) && NOTE_KIND (i2) == NOTE_INSN_EPILOGUE_BEG)
-   break;
- i2 = NEXT_INSN (i2);
-   }
+   i2 = NEXT_INSN (i2);
 
   if ((i1 == BB_END (bb1) && !NONDEBUG_INSN_P (i1))
  || (i2 == BB_END (bb2) && !NONDEBUG_INSN_P (i2)))
Index: gcc/df-problems.c
===
--- gcc.orig/df-problems.c
+++ gcc/df-problems.c
@@ -3953,8 +3953,6 @@ can_move_insns_across (rtx from, rtx to,
 {
   if (CALL_P (insn))
break;
-  if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
-   break;
   if (NONDEBUG_INSN_P (insn))
{
  if (may_trap_or_fault_p (PATTERN (insn))
Index: gcc/dwarf2out.c
===
--- gcc.orig/dwarf2out.c
+++ gcc/dwarf2out.c
@@ -2939,10 +2939,10 @@ dwarf2out_frame_debug (rtx insn, bool af
 dwarf2out_flush_queued_reg_saves ();
 }
 
-/* Determine if we need to save and restore CFI information around this
-   epilogue.  If SIBCALL is true, then this is a sibcall epilogue.  If
-   we do need to save/restore, then emit the save now, and insert a
-   NOTE_INSN_CFA_RESTORE_STATE at the appropriate place in the stream.  */
+/* Determine if we need to save and restore CFI information around
+   this epilogue.  If we do need to save/restore, then emit the save
+   now, and insert a NOTE_INSN_CFA_RESTORE_STATE at the appropriate
+   place in the stream.  */
 
 void
 dwarf2out_cfi_begin_epilogue (rtx insn)
@@ -2957,8 +2957,10 @@ dwarf2out_cfi_begin_epilogue (rtx insn)
   if (!INSN_P (i))
continue;
 
-  /* Look for both regular and sibcalls to end the block.  */
-  if (returnjump_p (i))
+  /* Look for both regular and sibcalls to end the block.  Various
+optimization passes may cause us to jump to a common epilogue
+tail, so we also accept simplejumps.  */
+  if (returnjump_p (i) || simplejump_p (i))
break;
   if (CALL_P (i) && SIBLING_CALL_P (i))
break;