Now generates RTL with appropriate stack restore and leave patterns.  Slightly
cleaned up code that calculates the number of vector elements for clarity.

Tests are good when rebased onto gcc-7_1_0-release as HEAD currently fails to
bootstrap.

Signed-off-by: Daniel Santos <daniel.san...@pobox.com>
---
 gcc/config/i386/i386.c | 287 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 278 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f2772b2d10e..e43dc819f9a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14148,6 +14148,78 @@ ix86_elim_entry_set_got (rtx reg)
     }
 }
 
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+  rtx addr, mem;
+
+  if (offset)
+    addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+  mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+  return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+  return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+                         + m->call_ms2sysv_extra_regs;
+  rtvec v = rtvec_alloc (ncregs + 1);
+  unsigned int align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, addr;
+  rtx rax = gen_rtx_REG (word_mode, AX_REG);
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset () + m->fs.sp_offset;
+  HOST_WIDE_INT stack_alloc_size = frame.stack_pointer_offset - 
m->fs.sp_offset;
+  HOST_WIDE_INT stack_align_off_in = xlogue.get_stack_align_off_in ();
+
+  /* Verify that the incoming stack 16-byte alignment offset matches the
+     layout we're using.  */
+  gcc_assert (stack_align_off_in == (m->fs.sp_offset & UNITS_PER_WORD));
+
+  /* Get the stub symbol.  */
+  sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+                                                 : XLOGUE_STUB_SAVE);
+  RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+
+  /* Setup RAX as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  addr = choose_baseaddr (rax_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  insn = emit_insn (gen_rtx_SET (rax, addr));
+
+  gcc_assert (stack_alloc_size >= xlogue.get_stack_space_used ());
+  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                            GEN_INT (-stack_alloc_size), -1,
+                            m->fs.cfa_reg == stack_pointer_rtx);
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
+                            r.regno);
+      RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);;
+    }
+
+  gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+  RTX_FRAME_RELATED_P (insn) = true;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -14395,7 +14467,7 @@ ix86_expand_prologue (void)
         performing the actual alignment.  Otherwise we cannot guarantee
         that there's enough storage above the realignment point.  */
       allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
-      if (allocate)
+      if (allocate && !m->call_ms2sysv)
         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
                                   GEN_INT (-allocate), -1, false);
 
@@ -14403,7 +14475,6 @@ ix86_expand_prologue (void)
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
                                        stack_pointer_rtx,
                                        GEN_INT (-align_bytes)));
-
       /* For the purposes of register save area addressing, the stack
         pointer can no longer be used to access anything in the frame
         below m->fs.sp_realigned_offset and the frame pointer cannot be
@@ -14420,6 +14491,9 @@ ix86_expand_prologue (void)
        m->fs.sp_valid = false;
     }
 
+  if (m->call_ms2sysv)
+    ix86_emit_outlined_ms2sysv_save (frame);
+
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
 
   if (flag_stack_usage_info)
@@ -14740,17 +14814,19 @@ ix86_emit_restore_regs_using_pop (void)
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
 }
 
-/* Emit code and notes for the LEAVE instruction.  */
+/* Emit code and notes for the LEAVE instruction.  If insn is non-null,
+   omits the emit and only attaches the notes.  */
 
 static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
 {
   struct machine_function *m = cfun->machine;
-  rtx_insn *insn = emit_insn (ix86_gen_leave ());
+  if (!insn)
+    insn = emit_insn (ix86_gen_leave ());
 
   ix86_add_queued_cfa_restore_notes (insn);
 
@@ -14844,6 +14920,164 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT 
cfa_offset,
       }
 }
 
+static void
+ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
+                                 bool use_call, int style)
+{
+  struct machine_function *m = cfun->machine;
+  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+                         + m->call_ms2sysv_extra_regs;
+  rtvec v;
+  unsigned int elems_needed, align, i, vi = 0;
+  rtx_insn *insn;
+  rtx sym, tmp;
+  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+  rtx r10 = NULL_RTX;
+  const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+  HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+  HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
+  rtx rsi_frame_load = NULL_RTX;
+  HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
+  enum xlogue_stub stub;
+
+  gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+
+  /* If using a realigned stack, we should never start with padding.  */
+  gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
+
+  /* Setup RSI as the stub's base pointer.  */
+  align = GET_MODE_ALIGNMENT (V4SFmode);
+  tmp = choose_baseaddr (rsi_offset, &align);
+  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+  emit_insn (gen_rtx_SET (rsi, tmp));
+
+  /* Get a symbol for the stub.  */
+  if (frame_pointer_needed)
+    stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+                   : XLOGUE_STUB_RESTORE_HFP_TAIL;
+  else
+    stub = use_call ? XLOGUE_STUB_RESTORE
+                   : XLOGUE_STUB_RESTORE_TAIL;
+  sym = xlogue.get_stub_rtx (stub);
+
+  elems_needed = ncregs;
+  if (use_call)
+    elems_needed += 1;
+  else
+    elems_needed += frame_pointer_needed ? 5 : 3;
+  v = rtvec_alloc (elems_needed);
+
+  /* We call the epilogue stub when we need to pop incoming args or we are
+     doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
+     epilogue stub and it is the tail-call.  */
+  if (use_call)
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+  else
+    {
+      RTVEC_ELT (v, vi++) = ret_rtx;
+      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+      if (frame_pointer_needed)
+       {
+         rtx rbp = gen_rtx_REG (DImode, BP_REG);
+         gcc_assert (m->fs.fp_valid);
+         gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+         tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
+         RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
+         RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
+         tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+         RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
+       }
+      else
+       {
+         /* If no hard frame pointer, we set R10 to the SP restore value.  */
+         gcc_assert (!m->fs.fp_valid);
+         gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+         gcc_assert (m->fs.sp_valid);
+
+         r10 = gen_rtx_REG (DImode, R10_REG);
+         tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
+         emit_insn (gen_rtx_SET (r10, tmp));
+
+         RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
+       }
+    }
+
+  /* Generate frame load insns and restore notes.  */
+  for (i = 0; i < ncregs; ++i)
+    {
+      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+      enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+      rtx reg, frame_load;
+
+      reg = gen_rtx_REG (mode, r.regno);
+      frame_load = gen_frame_load (reg, rsi, r.offset);
+
+      /* Save RSI frame load insn & note to add last.  */
+      if (r.regno == SI_REG)
+       {
+         gcc_assert (!rsi_frame_load);
+         rsi_frame_load = frame_load;
+         rsi_restore_offset = r.offset;
+       }
+      else
+       {
+         RTVEC_ELT (v, vi++) = frame_load;
+         ix86_add_cfa_restore_note (NULL, reg, r.offset);
+       }
+    }
+
+  /* Add RSI frame load & restore note at the end.  */
+  gcc_assert (rsi_frame_load);
+  gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
+  RTVEC_ELT (v, vi++) = rsi_frame_load;
+  ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+                            rsi_restore_offset);
+
+  /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
+  if (!use_call && !frame_pointer_needed)
+    {
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+
+      /* At this point, R10 should point to frame.stack_realign_offset.  */
+      if (m->fs.cfa_reg == stack_pointer_rtx)
+       m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
+      m->fs.sp_offset = frame.stack_realign_offset;
+    }
+
+  gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
+  tmp = gen_rtx_PARALLEL (VOIDmode, v);
+  if (use_call)
+      insn = emit_insn (tmp);
+  else
+    {
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = ret_rtx;
+
+      if (frame_pointer_needed)
+       ix86_emit_leave (insn);
+      else
+       {
+         /* Need CFA adjust note.  */
+         tmp = gen_rtx_SET (stack_pointer_rtx, r10);
+         add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
+       }
+    }
+
+  RTX_FRAME_RELATED_P (insn) = true;
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  /* If we're not doing a tail-call, we need to adjust the stack.  */
+  if (use_call && m->fs.sp_valid)
+    {
+      HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                               GEN_INT (dealloc), style,
+                               m->fs.cfa_reg == stack_pointer_rtx);
+    }
+}
+
 /* Restore function stack, frame, and registers.  */
 
 void
@@ -14854,6 +15088,7 @@ ix86_expand_epilogue (int style)
   struct ix86_frame frame;
   bool restore_regs_via_mov;
   bool using_drap;
+  bool restore_stub_is_tail = false;
 
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
@@ -14956,7 +15191,37 @@ ix86_expand_epilogue (int style)
     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
                                          style == 2);
 
-  if (restore_regs_via_mov)
+  if (m->call_ms2sysv)
+    {
+      int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
+
+      /* We cannot use a tail-call for the stub if:
+        1. We have to pop incoming args,
+        2. We have additional int regs to restore, or
+        3. A sibling call will be the tail-call, or
+        4. We are emitting an eh_return_internal epilogue.
+
+        TODO: Item 4 has not yet tested!
+
+        If any of the above are true, we will call the stub rather than
+        jump to it.  */
+      restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
+      ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
+    }
+
+  /* If using out-of-line stub that is a tail-call, then...*/
+  if (m->call_ms2sysv && restore_stub_is_tail)
+    {
+      /* TODO: parinoid tests. (remove eventually)  */
+      gcc_assert (m->fs.sp_valid);
+      gcc_assert (!m->fs.sp_realigned);
+      gcc_assert (!m->fs.fp_valid);
+      gcc_assert (!m->fs.realigned);
+      gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
+      gcc_assert (!crtl->drap_reg);
+      gcc_assert (!frame.nregs);
+    }
+  else if (restore_regs_via_mov)
     {
       rtx t;
 
@@ -15087,7 +15352,7 @@ ix86_expand_epilogue (int style)
       else if (TARGET_USE_LEAVE
               || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
               || !cfun->machine->use_fast_prologue_epilogue)
-       ix86_emit_leave ();
+       ix86_emit_leave (NULL);
       else
         {
          pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -15198,7 +15463,7 @@ ix86_expand_epilogue (int style)
       else
        emit_jump_insn (gen_simple_return_pop_internal (popc));
     }
-  else
+  else if (!m->call_ms2sysv || !restore_stub_is_tail)
     emit_jump_insn (gen_simple_return_internal ());
 
   /* Restore the state back to the state from the prologue,
@@ -28927,6 +29192,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 
          clobber_reg (&use, gen_rtx_REG (mode, regno));
        }
+
+      /* Set here, but it may get cleared later.  */
+      if (TARGET_CALL_MS2SYSV_XLOGUES)
+       cfun->machine->call_ms2sysv = true;
     }
 
   if (vec_len > 1)
-- 
2.11.0

Reply via email to