RE: [RFA] Compact EH Patch

Matthew Fortune Wed, 28 Oct 2015 09:34:29 -0700

> This patch implements a more compact format for exception handling data.  
> Although I don't
> have recent numbers for the amount of compression achieved, an earlier 
> measurement showed
> a 30% reduction in the size of EH data for libstdc++.
> 
> A design document detailing the new format is available
> (https://github.com/MentorEmbedded/cxx-abi/blob/master/MIPSCompactEH.pdf).
> 
> This implementation enables the new format for MIPS targets only, but the 
> generic pieces
> to enable the new format for other architectures is in place.
> I couldn't really think of a good way to split up the patch to make the 
> review easier, but
> I am open to suggestions.
> I've successfully bootstrapped with the patch and completed testing across 
> many of the
> MIPS targets and multilibs.
> I also ran a test series for the x86 without regressions.
> Thanks,
> Catherine


Hi Catherine,

I'd be lying if I said I understand all of this but I follow the bulk of what 
is needed
from the MIPS side of things. This looks fine to me. There is some code which 
may not
be MIPS specific in here, a request to change the magic constants to macros and 
comments
in the key function that actually generates compact EH entries 
(mips_cfi_endproc).

As this seems quite well separated from non-compact-eh cases then I am more 
than happy
to deal with any issues or refinement incrementally. I'd like to get such a big 
patch
committed sooner rather than later.

Comments inline.

Thanks,
Matthew

>Index: libgcc/config/mips/mips-unwind.h
>===================================================================
>--- libgcc/config/mips/mips-unwind.h   (revision 0)
>+++ libgcc/config/mips/mips-unwind.h   (revision 0)
>@@ -0,0 +1,182 @@
>+/* Compact EH unwinding support for MIPS.
>+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
>+
>+This file is part of GCC.
>+
>+GCC is free software; you can redistribute it and/or modify
>+it under the terms of the GNU General Public License as published by
>+the Free Software Foundation; either version 3, or (at your option)
>+any later version.
>+
>+GCC is distributed in the hope that it will be useful,
>+but WITHOUT ANY WARRANTY; without even the implied warranty of
>+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>+GNU General Public License for more details.
>+
>+Under Section 7 of GPL version 3, you are granted additional
>+permissions described in the GCC Runtime Library Exception, version
>+3.1, as published by the Free Software Foundation.
>+
>+You should have received a copy of the GNU General Public License and
>+a copy of the GCC Runtime Library Exception along with this program;
>+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>+<http://www.gnu.org/licenses/>.  */
>+
>+#ifdef MD_HAVE_COMPACT_EH
>+
>+#define DWARF_SP_REGNO 29
>+
>+#if _MIPS_SIM == _ABIO32
>+#define MIPS_EH_STACK_ALIGN 8
>+#else
>+#define MIPS_EH_STACK_ALIGN 16
>+#endif
>+
>+#define VRF_0 32
>+

The following two functions are missing a brief comment

>+static int
>+record_push (_Unwind_FrameState *fs, int reg, int offset)
>+{
>+  int idx = DWARF_REG_TO_UNWIND_COLUMN (reg);
>+
>+  offset -= dwarf_reg_size_table[idx];
>+  fs->regs.reg[idx].how = REG_SAVED_OFFSET;
>+  fs->regs.reg[idx].loc.offset = offset;
>+  return offset;
>+}
>+
>+static void
>+record_cfa_adjustment (_Unwind_FrameState *fs, _uleb128_t val)
>+{
>+  int i;
>+  fs->regs.cfa_offset += val;
>+  /* In case we see an adjustment after pushes, it means that
>+     the registers aren't saved at the top of the frame, probably
>+     due to a varargs save area.  Adjust the recorded offsets.  */
>+  for (i = 0; i < DWARF_FRAME_REGISTERS + 1; i++)
>+    if (fs->regs.reg[i].how == REG_SAVED_OFFSET)
>+      fs->regs.reg[i].loc.offset -= val;
>+}
>+
>+/* Process the frame unwinding opcodes.  */
>+
>+static _Unwind_Reason_Code
>+md_unwind_compact (struct _Unwind_Context *context ATTRIBUTE_UNUSED,
>+                 _Unwind_FrameState *fs, const unsigned char **pp)
>+{
>+  unsigned char op;
>+  _uleb128_t val;
>+  int push_offset;
>+  int i;
>+  int n;
>+  const unsigned char *p = *pp;
>+
>+  push_offset = 0;
>+  fs->regs.cfa_how = CFA_REG_OFFSET;
>+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
>+  fs->regs.cfa_offset = 0;
>+  fs->retaddr_column = 31;
>+
>+  while (1)
>+    {
>+      op = *(p++);
>+
>+      if (op < 0x40)

Can we get these magic constants converted to some macros? I think there are 
sensible
names they could be given.

>+      {
>+        /* Increment stack pointer.  */
>+        record_cfa_adjustment (fs, (op + 1) * MIPS_EH_STACK_ALIGN);
>+      }
>+      else if (op < 0x48)
>+      {
>+        /* Push VR[16] to VR[16+x] and VR[31] */
>+        push_offset = record_push (fs, 31, push_offset);
>+        for (i = op & 7; i >= 0; i--)
>+          push_offset = record_push (fs, 16 + i, push_offset);
>+      }
>+      else if (op < 0x50)
>+      {
>+        /* Push VR[16] to VR[16+x], VR[30] and VR[31] */
>+        push_offset = record_push (fs, 31, push_offset);
>+        push_offset = record_push (fs, 30, push_offset);
>+        for (i = op & 7; i >= 0; i--)
>+          push_offset = record_push (fs, 16 + i, push_offset);
>+      }
>+      else if (op < 0x58)
>+      {
>+        /* Restore stack ponter from frame pointer */
>+        fs->regs.cfa_reg = (op & 7) + 16;
>+        fs->regs.cfa_offset = 0;
>+      }
>+      else if (op == 0x58)
>+              {

indentation

>+        /* Large SP increment.  */
>+        p = read_uleb128 (p, &val);
>+        record_cfa_adjustment (fs, (val + 129) * MIPS_EH_STACK_ALIGN);
>+      }
>+      else if (op == 0x59)
>+      {
>+        /* Push VR[x] to VR[x+y] */
>+        op = *(p++);
>+        n = op >> 3;
>+        for (i = op & 7; i >= 0; i--)
>+          push_offset = record_push (fs, n + i, push_offset);
>+      }
>+      else if (op == 0x5a)
>+      {
>+        /* Push VRF[x] to VRF[x+y] */
>+        op = *(p++);
>+        n = (op >> 3) + VRF_0;
>+        for (i = op & 7; i >= 0; i--)
>+          push_offset = record_push (fs, n + i, push_offset);
>+      }
>+      else if (op == 0x5b)
>+      {
>+        /* Restore the CFA to stack pointer.  */
>+        fs->regs.cfa_reg = STACK_POINTER_REGNUM;
>+        fs->regs.cfa_offset = 0;
>+      }
>+

spurious newline.

>+      else if (op == 0x5c)
>+      {
>+        /* Finish.  */
>+        *pp = p;
>+        return _URC_NO_REASON;
>+      }
>+      else if (op == 0x5d)
>+      {
>+        /* No unwind.  */
>+        return _URC_END_OF_STACK;
>+      }
>+      else if (op == 0x5e)
>+      {
>+        /* Restore SP from VR[30] */
>+        fs->regs.cfa_reg = 30;
>+        fs->regs.cfa_offset = 0;
>+      }
>+      else if (op == 0x5f)
>+      {
>+        /* NOP */
>+      }
>+      else if (op >= 0x60 && op < 0x6c)
>+      {
>+        /* Push VRF[20] to VRF[20 + x] */
>+        for (i = op & 0xf; i >= 0; i--)
>+          push_offset = record_push (fs, VRF_0 + 20 + i, push_offset);
>+      }
>+      else if (op >= 0x6c && op < 0x70)
>+      {
>+        /* MIPS16 push VR[16], VR[17], VR[18+x]-VR[23], VR[31] */
>+        push_offset = record_push (fs, 31, push_offset);
>+        for (i = 23; i >= 18 + (op & 3); i--)
>+          push_offset = record_push (fs, i, push_offset);
>+        push_offset = record_push (fs, 17, push_offset);
>+        push_offset = record_push (fs, 16, push_offset);
>+      }
>+      else
>+      {
>+        return _URC_FATAL_PHASE1_ERROR;
>+      }

No braces required above.

>+    }
>+}
>+
>+#endif /* MD_HAVE_COMPACT_EH */
>Index: libgcc/config/mips/mips16.S
>===================================================================
>--- libgcc/config/mips/mips16.S        (revision 226409)
>+++ libgcc/config/mips/mips16.S        (working copy)
>@@ -624,6 +624,18 @@ CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
>    being called is 16 bits, in which case the copy is unnecessary;
>    however, it's faster to always do the copy.  */
> 
>+#ifdef __GNU_COMPACT_EH__
>+#define CALL_STUB_RET(NAME, CODE, MODE)       \
>+STARTFN (NAME);                               \
>+      move    $18,$31;                \
>+      STUB_ARGS_##CODE;               \
>+      .set    noreorder;              \
>+      jalr    $2;                     \
>+      move    $25,$2;                 \
>+      .set    reorder;                \
>+      MOVE_##MODE##_RET (f, $18);     \
>+      ENDFN (NAME)
>+#else

Does this mean you can't unwind through a MIPS16 stub with compact EH?

>Index: gcc/doc/invoke.texi
>===================================================================
>--- gcc/doc/invoke.texi        (revision 226409)
>+++ gcc/doc/invoke.texi        (working copy)
>@@ -814,7 +814,7 @@ Objective-C and Objective-C++ Dialects}.
> -mbranch-cost=@var{num}  -mbranch-likely  -mno-branch-likely @gol
> -mfp-exceptions -mno-fp-exceptions @gol
> -mvr4130-align -mno-vr4130-align -msynci -mno-synci @gol
>--mrelax-pic-calls -mno-relax-pic-calls -mmcount-ra-address}
>+-mrelax-pic-calls -mno-relax-pic-calls -mmcount-ra-address -mno-compact-eh}

Normally both the positive and negative options are quoted ih here.

>Index: gcc/config/mips/mips.opt
>===================================================================
>--- gcc/config/mips/mips.opt   (revision 226409)
>+++ gcc/config/mips/mips.opt   (working copy)
>@@ -99,6 +99,10 @@ Enum(mips_code_readable_setting) String(pcrel) Val
> EnumValue
> Enum(mips_code_readable_setting) String(no) Value(CODE_READABLE_NO)
> 
>+mcompact-eh
>+Target Var(TARGET_COMPACT_EH) Init(1)
>+Use compact exception unwinding tables.
>+

Should this be a MIPS specific option? Given there is also control for each
backend to define MD_HAVE_COMPACT_EH then I'd have thought this would be a
-f option.

>Index: gcc/config/mips/mips.c
>===================================================================
>--- gcc/config/mips/mips.c     (revision 226409)
>+++ gcc/config/mips/mips.c     (working copy)
>@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3.  If not see
> #include "cgraph.h"
> #include "builtins.h"
> #include "rtl-iter.h"
>+#include "except.h"
> 
> /* This file should be included last.  */
> #include "target-def.h"
>@@ -693,6 +694,8 @@ static mips_one_only_stub *mips16_rdhwr_stub;
> static mips_one_only_stub *mips16_get_fcsr_stub;
> static mips_one_only_stub *mips16_set_fcsr_stub;
> 
>+static bool done_cfi_sections;
>+
> /* Index R is the smallest register class that contains register R.  */
> const enum reg_class mips_regno_to_class[FIRST_PSEUDO_REGISTER] = {
>   LEA_REGS,        LEA_REGS,        M16_STORE_REGS,  V1_REG,
>@@ -6669,6 +6672,40 @@ mips_start_unique_function (const char *name)
>   putc ('\n', asm_out_file);
> }
> 
>+/* The LTO frontend only enables exceptions when it sees a function that
>+   uses it.  This changes the return value of dwarf2out_do_frame, so we
>+   have to check before every function.  */
>+
>+void
>+mips_fixup_cfi_sections (void)
>+{
>+#ifdef MD_HAVE_COMPACT_EH
>+  if (done_cfi_sections)
>+    return;
>+
>+  if (!TARGET_COMPACT_EH)
>+    return;
>+
>+  /* Output a .cfi_sections directive.  */
>+  if (dwarf2out_do_frame ())
>+    {
>+      if (flag_unwind_tables || flag_exceptions)
>+      {
>+        if (write_symbols == DWARF2_DEBUG
>+            || write_symbols == VMS_AND_DWARF2_DEBUG)
>+          fprintf (asm_out_file,
>+                   "\t.cfi_sections .debug_frame, .eh_frame_entry\n");
>+        else
>+          fprintf (asm_out_file, "\t.cfi_sections .eh_frame_entry\n");
>+      }
>+      else
>+      fprintf (asm_out_file, "\t.cfi_sections .debug_frame\n");
>+      done_cfi_sections = true;
>+    }
>+#endif
>+}
>+
>+

Double newline.  This code does not seem MIPS specific and is likely to be
duplicated for any other target with compact-eh.  I'm not going to insist on
moving it out of here but I hope that when another arch uses compact-eh then
this code will be moved.

> /* Start a definition of function NAME.  MIPS16_P indicates whether the
>    function contains MIPS16 code.  */
> 
>@@ -7252,7 +7289,13 @@ mips16_build_call_stub (rtx retval, rtx *fn_ptr, r
> 
>         /* "Save" $sp in itself so we don't use the fake CFA.
>            This is: DW_CFA_val_expression r29, { DW_OP_reg29 }.  */

This comment looks like it ends up duplicated below.

>-        fprintf (asm_out_file, "\t.cfi_escape 0x16,29,1,0x6d\n");
>+        if (dwarf2out_do_frame ()
>+            && (flag_unwind_tables || flag_exceptions))
>+          /* "Save" $sp in itself so we don't use the fake CFA.
>+             This is: DW_CFA_val_expression r29, { DW_OP_reg29 }.  */
>+          fprintf (asm_out_file, "\t.cfi_fde_data 0x5b\n");
>+        else
>+          fprintf (asm_out_file, "\t.cfi_escape 0x16,29,1,0x6d\n");
> 
>         /* Save the return address in $18.  The stub's caller knows
>            that $18 might be clobbered, even though $18 is usually
>@@ -8850,6 +8893,19 @@ mips_function_rodata_section (tree decl)
>   return data_section;
> }
> 
>+/* Implement TARGET_ASM_INIT_SECTIONS.  */
>+
>+static void
>+mips_asm_init_sections (void)
>+{
>+  if (TARGET_COMPACT_EH)
>+    {
>+      /* Let the assembler decide where to put the LSDA.  */
>+      exception_section = get_unnamed_section (0, output_section_asm_op,
>+                                             "\t.cfi_inline_lsda 2");
>+    }
>+}

I'm not sure this is really MIPS specific either. Same comment as before.

>@@ -12175,6 +12240,175 @@ mips_expand_epilogue (bool sibcall_p)
>       emit_insn_before (gen_mips_ehb (), insn);
>     }
> }
>+
>+/* Add an opcode to *FDE to describe the necessary pushes of r30/r31, if
>+   any.  */
>+static void
>+add_fp_ra_push (vec<uchar, va_gc> **fde)
>+{
>+  bool fp_needed = BITSET_P (cfun->machine->frame.mask, 30);
>+  if (!BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)
>+      && !fp_needed)
>+    return;
>+
>+  vec_safe_push (*fde, (uchar) 0x59);

Magic number again here that could have a meaningful name.

>+  if (fp_needed)
>+    vec_safe_push (*fde, (uchar) ((30 << 3) | 1));
>+  else
>+    vec_safe_push (*fde, uchar (31 << 3));

Does this compile? Shouldn't it be (uchar)?

>+}
>+
>+/* Add an opcode to *FDE to describe an adjustment of the CFA register by
>+   AMOUNT, given an assumed alignment ALIGN.  */
>+static void
>+mips_fdedata_cfaadjust (vec <uchar, va_gc> **fde,
>+                      HOST_WIDE_INT amount, int align)
>+{
>+  if (amount == 0)
>+    return;
>+
>+  gcc_assert (amount % align == 0);
>+  if (amount >= align * 129)
>+    {
>+      vec_safe_push (*fde, (uchar) 0x58);

Magic constants.

>+      push_uleb128 (fde, amount / align - 129);
>+    }
>+  else if (amount >= align * 65)
>+    {
>+      vec_safe_push (*fde, (uchar) 0x3f);
>+      vec_safe_push (*fde, (uchar) (amount / align - 64));
>+    }
>+  else
>+    vec_safe_push (*fde, (uchar) (amount / align - 1));
>+}
>+
>+/* Implements TARGET_ASM_OUTPUT_CFI_ENDPROC.  Emit the .cfi_endproc
>+   directive, and when emitting unwind information, also emit the
>+   .cfi_fde_data directive.  */
>+static void
>+mips_cfi_endproc (void)
>+{
>+  int align = TARGET_NEWABI ? 16 : 8;

Is this stack alignment? If so (or anything else that relates to ABI) then
there should be a macro you can reuse for this instead of inferring it from
TARGET_NEWABI.

There should be a check somewhere to inhibit the use of compact-eh with
anything other than o32/n32/n64 I think as there are many assumptions about
the ABI being one of these.

This function could do with a lot more comments to explain the process like
a step by step description of what is handled at each point.

>+  vec<uchar, va_gc> *fde = NULL;
>+  int i, len, n;
>+  HOST_WIDE_INT total, push_base;
>+  bool any_saved = false;
>+
>+  if (!TARGET_COMPACT_EH
>+      || (!flag_unwind_tables && !flag_exceptions)
>+      || flag_asynchronous_unwind_tables)
>+    {
>+      fprintf (asm_out_file, ".cfi_endproc\n");
>+      return;
>+    }
>+
>+  fprintf (asm_out_file, "\t.cfi_fde_data ");
>+
>+  total = cfun->machine->frame.total_size;
>+  if (cfun->machine->frame.num_fp > 0)
>+    push_base = cfun->machine->frame.fp_sp_offset + UNITS_PER_HWFPVALUE;
>+  else if (cfun->machine->frame.num_gp > 0)
>+    push_base = cfun->machine->frame.gp_sp_offset + UNITS_PER_WORD;
>+  else
>+    push_base = total;
>+
>+  if (frame_pointer_needed)
>+    {
>+      if (HARD_FRAME_POINTER_REGNUM == 30)
>+      vec_safe_push (fde, (uchar) 0x5E);
>+      else
>+      {
>+        int t = HARD_FRAME_POINTER_REGNUM - 16;
>+        gcc_assert (t < 8);
>+        vec_safe_push (fde, (uchar) (0x50 + t));
>+      }
>+      total -= cfun->machine->frame.hard_frame_pointer_offset;
>+      push_base -= cfun->machine->frame.hard_frame_pointer_offset;
>+    }
>+
>+  if (HARD_FRAME_POINTER_REGNUM == 30 && frame_pointer_needed)
>+    gcc_assert (BITSET_P (cfun->machine->frame.mask, 30));
>+
>+  mips_fdedata_cfaadjust (&fde, push_base, align);
>+  total -= push_base;
>+
>+  n = 0;
>+  for (i = 31; i > 19; i--)
>+    {
>+      bool isset = BITSET_P (cfun->machine->frame.fmask, i);
>+      if (isset)
>+      n++;
>+      if ((i == 20 || !isset) && n > 0)
>+      {
>+        int first = i + (isset ? 0 : 1);
>+        if (first == 20)
>+          {
>+            if (n > 8)
>+              vec_safe_push (fde, (uchar) (0x68 + n - 9));
>+            else
>+              vec_safe_push (fde, (uchar) (0x60 + n - 1));
>+          }
>+        else
>+          {
>+            if (n > 8)
>+              {
>+                vec_safe_push (fde, (uchar) 0x5a);
>+                vec_safe_push (fde, (uchar) (((first + 8) << 3) | (n - 9)));
>+                n = 8;
>+              }
>+            vec_safe_push (fde, (uchar) 0x5a);
>+            vec_safe_push (fde, (uchar) ((first << 3) | (n - 1)));
>+          }
>+        n = 0;
>+      }
>+    }
>+  gcc_assert (n == 0);
>+
>+  for (i = 23; i > 0; i--)
>+    {
>+      bool isset = BITSET_P (cfun->machine->frame.mask, i);
>+      if (isset)
>+      n++;
>+      if ((i == 16 || i == 1 || !isset) && n > 0)
>+      {
>+        int first = i + (isset ? 0 : 1);
>+        if (first == 16 && !any_saved
>+            && BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM))
>+          {
>+            if (BITSET_P (cfun->machine->frame.mask, 30))
>+              vec_safe_push (fde, (uchar) (0x48 + n - 1));
>+            else
>+              vec_safe_push (fde, (uchar) (0x40 + n - 1));
>+          }
>+        else
>+          {
>+            if (!any_saved)
>+              add_fp_ra_push (&fde);
>+            vec_safe_push (fde, (uchar) 0x59);
>+            vec_safe_push (fde, (uchar) ((first << 3) | (n - 1)));
>+          }
>+        any_saved = true;
>+        n = 0;
>+      }
>+    }
>+  gcc_assert (n == 0);
>+
>+  if (!any_saved)
>+    add_fp_ra_push (&fde);
>+
>+  /* Skip varargs save area and suchlike.  */
>+  mips_fdedata_cfaadjust (&fde, total, align);
>+
>+  /* The assembler appends a "Finish" opcode for out-of-line entries;
>+     the unwind code (uw_frame_state_compact) for inline entries.  */
>+
>+  len = vec_safe_length (fde);
>+  for (i = 0; i < len; i++)
>+    fprintf (asm_out_file, "0x%x%s", (*fde)[i],
>+           i + 1 == len ? "" : ",");
>+  fputc ('\n', asm_out_file);
>+  fprintf (asm_out_file, "\t.cfi_endproc\n");
>+}
> 
> /* Return nonzero if this function is known to have a null epilogue.
>    This allows the optimizer to omit jumps to jumps if no stack

RE: [RFA] Compact EH Patch

Reply via email to