https://gcc.gnu.org/g:eb94eb73cf3993c1d544e6eb8c4dcb671f215b25

commit eb94eb73cf3993c1d544e6eb8c4dcb671f215b25
Author: Michael Matz <m...@suse.de>
Date:   Sun Jun 30 03:52:39 2024 +0200

    x86: implement separate shrink wrapping

Diff:
---
 gcc/config/i386/i386.cc | 581 +++++++++++++++++++++++++++++++++++++++++++-----
 gcc/config/i386/i386.h  |   2 +
 2 files changed, 533 insertions(+), 50 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4b6b665e5997..33e69e96008d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -6970,7 +6970,7 @@ ix86_compute_frame_layout (void)
     }
 
   frame->save_regs_using_mov
-    = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
+    = (TARGET_PROLOGUE_USING_MOVE || flag_shrink_wrap_separate) && 
m->use_fast_prologue_epilogue;
 
   /* Skip return address and error code in exception handler.  */
   offset = INCOMING_FRAME_SP_OFFSET;
@@ -7120,7 +7120,8 @@ ix86_compute_frame_layout (void)
   /* Size prologue needs to allocate.  */
   to_allocate = offset - frame->sse_reg_save_offset;
 
-  if ((!to_allocate && frame->nregs <= 1)
+  if ((!to_allocate && frame->nregs <= 1
+       && !flag_shrink_wrap_separate)
       || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
        /* If static stack checking is enabled and done with probes,
          the registers need to be saved before allocating the frame.  */
@@ -7417,6 +7418,8 @@ ix86_emit_save_regs (void)
   int regno;
   rtx_insn *insn;
 
+  gcc_assert (!crtl->shrink_wrapped_separate);
+
   if (!TARGET_APX_PUSH2POP2
       || !ix86_can_use_push2pop2 ()
       || cfun->machine->func_type != TYPE_NORMAL)
@@ -7589,7 +7592,8 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
-        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+       if (!cfun->machine->reg_wrapped_separately[regno])
+         ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
        cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -7604,7 +7608,8 @@ ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT 
cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
-       ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
+       if (!cfun->machine->reg_wrapped_separately[regno])
+         ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
        cfa_offset -= GET_MODE_SIZE (V4SFmode);
       }
 }
@@ -9089,6 +9094,7 @@ ix86_expand_prologue (void)
        = frame.sse_reg_save_offset - frame.reg_save_offset;
 
       gcc_assert (int_registers_saved);
+      gcc_assert (!m->frame_alloc_separately);
 
       /* No need to do stack checking as the area will be immediately
         written.  */
@@ -9106,6 +9112,7 @@ ix86_expand_prologue (void)
       && flag_stack_clash_protection
       && !ix86_target_stack_probe ())
     {
+      gcc_assert (!m->frame_alloc_separately);
       ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
       allocate = 0;
     }
@@ -9116,6 +9123,7 @@ ix86_expand_prologue (void)
     {
       const HOST_WIDE_INT probe_interval = get_probe_interval ();
 
+      gcc_assert (!m->frame_alloc_separately);
       if (STACK_CHECK_MOVING_SP)
        {
          if (crtl->is_leaf
@@ -9172,9 +9180,16 @@ ix86_expand_prologue (void)
   else if (!ix86_target_stack_probe ()
           || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
     {
-      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-                                GEN_INT (-allocate), -1,
-                                m->fs.cfa_reg == stack_pointer_rtx);
+      if (!m->frame_alloc_separately)
+       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                                  GEN_INT (-allocate), -1,
+                                  m->fs.cfa_reg == stack_pointer_rtx);
+      else
+       {
+         if (m->fs.cfa_reg == stack_pointer_rtx)
+           m->fs.cfa_offset -= allocate;
+         m->fs.sp_offset += allocate;
+       }
     }
   else
     {
@@ -9184,6 +9199,8 @@ ix86_expand_prologue (void)
       bool eax_live = ix86_eax_live_at_start_p ();
       bool r10_live = false;
 
+      gcc_assert (!m->frame_alloc_separately);
+
       if (TARGET_64BIT)
         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
 
@@ -9338,6 +9355,7 @@ ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
   struct machine_function *m = cfun->machine;
   rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
 
+  gcc_assert (!m->reg_wrapped_separately[REGNO (reg)]);
   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
   m->fs.sp_offset -= UNITS_PER_WORD;
 
@@ -9396,6 +9414,9 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, 
bool ppx_p = false)
   const int offset = UNITS_PER_WORD * 2;
   rtx_insn *insn;
 
+  gcc_assert (!m->reg_wrapped_separately[REGNO (reg1)]
+             && !m->reg_wrapped_separately[REGNO (reg2)]);
+
   rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
                                                   stack_pointer_rtx));
 
@@ -9468,6 +9489,7 @@ ix86_emit_restore_regs_using_pop (bool ppx_p)
 {
   unsigned int regno;
 
+  gcc_assert (!crtl->shrink_wrapped_separate);
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
@@ -9484,6 +9506,7 @@ ix86_emit_restore_regs_using_pop2 (void)
   int loaded_regnum = 0;
   bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
 
+  gcc_assert (!crtl->shrink_wrapped_separate);
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
       {
@@ -9551,42 +9574,72 @@ ix86_emit_leave (rtx_insn *insn)
                             m->fs.fp_offset);
 }
 
+static void
+ix86_emit_restore_reg_using_mov (unsigned regno, HOST_WIDE_INT cfa_offset,
+                                bool cfi_here)
+{
+  if (GENERAL_REGNO_P (regno))
+    {
+      struct machine_function *m = cfun->machine;
+      rtx reg = gen_rtx_REG (word_mode, regno);
+      rtx mem;
+      rtx_insn *insn;
+
+      mem = choose_baseaddr (cfa_offset, NULL);
+      mem = gen_frame_mem (word_mode, mem);
+      insn = emit_move_insn (reg, mem);
+
+      if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+       {
+         /* Previously we'd represented the CFA as an expression
+            like *(%ebp - 8).  We've just popped that value from
+            the stack, which means we need to reset the CFA to
+            the drap register.  This will remain until we restore
+            the stack pointer.  */
+         add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+         RTX_FRAME_RELATED_P (insn) = 1;
+
+         /* This means that the DRAP register is valid for addressing.  */
+         m->fs.drap_valid = true;
+       }
+      else
+       ix86_add_cfa_restore_note (cfi_here ? insn : NULL, reg, cfa_offset);
+    }
+  else if (SSE_REGNO_P (regno))
+    {
+      rtx reg = gen_rtx_REG (V4SFmode, regno);
+      rtx mem;
+      unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
+      rtx_insn *insn;
+
+      mem = choose_baseaddr (cfa_offset, &align);
+      mem = gen_rtx_MEM (V4SFmode, mem);
+
+      /* The location aligment depends upon the base register.  */
+      align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
+      gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
+      set_mem_align (mem, align);
+      insn = emit_insn (gen_rtx_SET (reg, mem));
+
+      ix86_add_cfa_restore_note (cfi_here ? insn : NULL, reg, cfa_offset);
+    }
+  else
+    abort ();
+}
+
 /* Emit code to restore saved registers using MOV insns.
    First register is restored from CFA - CFA_OFFSET.  */
 static void
 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
                                  bool maybe_eh_return)
 {
-  struct machine_function *m = cfun->machine;
   unsigned int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, 
true))
       {
-       rtx reg = gen_rtx_REG (word_mode, regno);
-       rtx mem;
-       rtx_insn *insn;
-
-       mem = choose_baseaddr (cfa_offset, NULL);
-       mem = gen_frame_mem (word_mode, mem);
-       insn = emit_move_insn (reg, mem);
-
-        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
-         {
-           /* Previously we'd represented the CFA as an expression
-              like *(%ebp - 8).  We've just popped that value from
-              the stack, which means we need to reset the CFA to
-              the drap register.  This will remain until we restore
-              the stack pointer.  */
-           add_reg_note (insn, REG_CFA_DEF_CFA, reg);
-           RTX_FRAME_RELATED_P (insn) = 1;
-
-           /* This means that the DRAP register is valid for addressing.  */
-           m->fs.drap_valid = true;
-         }
-       else
-         ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
-
+       if (!cfun->machine->reg_wrapped_separately[regno])
+         ix86_emit_restore_reg_using_mov (regno, cfa_offset, false);
        cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -9602,21 +9655,8 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT 
cfa_offset,
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
       {
-       rtx reg = gen_rtx_REG (V4SFmode, regno);
-       rtx mem;
-       unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
-
-       mem = choose_baseaddr (cfa_offset, &align);
-       mem = gen_rtx_MEM (V4SFmode, mem);
-
-       /* The location aligment depends upon the base register.  */
-       align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
-       gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
-       set_mem_align (mem, align);
-       emit_insn (gen_rtx_SET (reg, mem));
-
-       ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
-
+       if (!cfun->machine->reg_wrapped_separately[regno])
+         ix86_emit_restore_reg_using_mov (regno, cfa_offset, false);
        cfa_offset -= GET_MODE_SIZE (V4SFmode);
       }
 }
@@ -9854,6 +9894,11 @@ ix86_expand_epilogue (int style)
   /* EH_RETURN requires the use of moves to function properly.  */
   if (crtl->calls_eh_return)
     restore_regs_via_mov = true;
+  else if (crtl->shrink_wrapped_separate)
+    {
+      gcc_assert (!TARGET_SEH);
+      restore_regs_via_mov = true;
+    }
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
     restore_regs_via_mov = false;
@@ -9888,6 +9933,7 @@ ix86_expand_epilogue (int style)
          && sp_valid_at (frame.stack_realign_offset + 1)
          && (frame.nsseregs + frame.nregs) != 0)
        {
+         gcc_assert (!m->frame_alloc_separately);
          pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
                                     GEN_INT (m->fs.sp_offset
                                              - frame.sse_reg_save_offset),
@@ -9945,6 +9991,7 @@ ix86_expand_epilogue (int style)
          rtx sa = EH_RETURN_STACKADJ_RTX;
          rtx_insn *insn;
 
+         gcc_assert (!m->frame_alloc_separately);
          /* Stack realignment doesn't work with eh_return.  */
          if (crtl->stack_realign_needed)
            sorry ("Stack realignment not supported with "
@@ -10017,6 +10064,7 @@ ix86_expand_epilogue (int style)
     }
   else
     {
+      gcc_assert (!m->frame_alloc_separately);
       /* SEH requires that the function end with (1) a stack adjustment
         if necessary, (2) a sequence of pops, and (3) a return or
         jump instruction.  Prevent insns from the function body from
@@ -10069,6 +10117,7 @@ ix86_expand_epilogue (int style)
      then do so now.  */
   if (m->fs.fp_valid)
     {
+      gcc_assert (!m->frame_alloc_separately);
       /* If the stack pointer is valid and pointing at the frame
         pointer store address, then we only need a pop.  */
       if (sp_valid_at (frame.hfp_save_offset)
@@ -10095,6 +10144,7 @@ ix86_expand_epilogue (int style)
       rtx_insn *insn;
 
       gcc_assert (stack_realign_drap);
+      gcc_assert (!m->frame_alloc_separately);
 
       if (ix86_static_chain_on_stack)
        param_ptr_offset += UNITS_PER_WORD;
@@ -10130,9 +10180,10 @@ ix86_expand_epilogue (int style)
   gcc_assert (!m->fs.realigned);
   if (m->fs.sp_offset != UNITS_PER_WORD)
     {
-      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-                                GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
-                                style, true);
+      if (!m->frame_alloc_separately)
+       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                                  GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
+                                  style, true);
     }
   else
     ix86_add_queued_cfa_restore_notes (get_last_insn ());
@@ -10730,6 +10781,436 @@ ix86_live_on_entry (bitmap regs)
     }
 }
 
+/* Separate shrink-wrapping.  */
+#define NCOMPONENTS (FIRST_PSEUDO_REGISTER + 1)
+#define SW_FRAME FIRST_PSEUDO_REGISTER
+
+static bool
+separate_frame_alloc_p (void)
+{
+  struct machine_function *m = cfun->machine;
+  if (frame_pointer_needed
+      || TARGET_SEH
+      || crtl->stack_realign_needed
+      || m->call_ms2sysv)
+    return false;
+  return true;
+}
+
+static sbitmap
+ix86_get_separate_components (void)
+{
+  struct machine_function *m = cfun->machine;
+  struct ix86_frame *frame = &m->frame;
+  sbitmap components;
+
+  ix86_finalize_stack_frame_flags ();
+  if (!frame->save_regs_using_mov)
+    return NULL;
+
+  components = sbitmap_alloc (NCOMPONENTS);
+  bitmap_clear (components);
+
+  for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (ix86_save_reg (regno, true, true))
+      {
+       bitmap_set_bit (components, regno);
+      }
+
+  if (separate_frame_alloc_p ())
+    bitmap_set_bit (components, SW_FRAME);
+
+  return components;
+}
+
+static sbitmap
+ix86_components_for_bb (basic_block bb)
+{
+  bool need_frame = false;
+  sbitmap components = sbitmap_alloc (NCOMPONENTS);
+  bitmap_clear (components);
+
+  bitmap in = DF_LIVE_IN (bb);
+  bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+  bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+  for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (ix86_save_reg (regno, true, true)
+       && (bitmap_bit_p (in, regno)
+           || bitmap_bit_p (gen, regno)
+           || bitmap_bit_p (kill, regno)))
+      {
+       bitmap_set_bit (components, regno);
+       /* XXX we don't really need a frame for saving registers,
+          we sometimes can use the red-zone.  */
+       need_frame = true;
+      }
+
+  if (!need_frame && separate_frame_alloc_p ())
+    {
+      HARD_REG_SET set_up_by_prologue, prologue_used;
+      rtx_insn *insn;
+
+      CLEAR_HARD_REG_SET (prologue_used);
+      CLEAR_HARD_REG_SET (set_up_by_prologue);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode,
+                          HARD_FRAME_POINTER_REGNUM);
+
+      FOR_BB_INSNS (bb, insn)
+       {
+         if (NONDEBUG_INSN_P (insn)
+             && requires_stack_frame_p (insn, prologue_used,
+                                        set_up_by_prologue))
+           {
+             need_frame = true;
+             break;
+
+           }
+
+       }
+    }
+  if (need_frame)
+    bitmap_set_bit (components, SW_FRAME);
+
+  return components;
+}
+
+static void
+ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+}
+
+static void
+ix86_init_frame_state (void)
+{
+  struct machine_function *m = cfun->machine;
+
+  memset (&m->fs, 0, sizeof (m->fs));
+
+  /* Initialize CFA state for before the prologue.  */
+  m->fs.cfa_reg = stack_pointer_rtx;
+  m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
+
+  /* Track SP offset to the CFA.  We continue tracking this after we've
+     swapped the CFA register away from SP.  In the case of re-alignment
+     this is fudged; we're interested to offsets within the local frame.  */
+  m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+  m->fs.sp_valid = true;
+  m->fs.sp_realigned = false;
+}
+
+static void
+ix86_alloc_frame (void)
+{
+  struct machine_function *m = cfun->machine;
+  const struct ix86_frame &frame = m->frame;
+  rtx insn, t;
+  bool int_registers_saved = true;
+  bool sse_registers_saved = true;
+  HOST_WIDE_INT allocate;
+
+  memset (&m->fs, 0, sizeof (m->fs));
+
+  /* Initialize CFA state for before the prologue.  */
+  m->fs.cfa_reg = stack_pointer_rtx;
+  m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
+
+  /* Track SP offset to the CFA.  We continue tracking this after we've
+     swapped the CFA register away from SP.  In the case of re-alignment
+     this is fudged; we're interested to offsets within the local frame.  */
+  m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+  m->fs.sp_valid = true;
+  m->fs.sp_realigned = false;
+
+  allocate = frame.stack_pointer_offset - m->fs.sp_offset;
+
+  /* On SEH target with very large frame size, allocate an area to save
+     SSE registers (as the very large allocation won't be described).  */
+  if (TARGET_SEH
+      && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+      && !sse_registers_saved)
+    {
+      abort();
+    }
+
+  /* If stack clash protection is requested, then probe the stack, unless it
+     is already probed on the target.  */
+  if (allocate >= 0
+      && flag_stack_clash_protection
+      && !ix86_target_stack_probe ())
+    {
+      abort();
+      ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
+      allocate = 0;
+    }
+
+  /* The stack has already been decremented by the instruction calling us
+     so probe if the size is non-negative to preserve the protection area.  */
+  else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      const HOST_WIDE_INT probe_interval = get_probe_interval ();
+
+      abort();
+      if (STACK_CHECK_MOVING_SP)
+       {
+         if (crtl->is_leaf
+             && !cfun->calls_alloca
+             && allocate <= probe_interval)
+           ;
+
+         else
+           {
+             ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
+             allocate = 0;
+           }
+       }
+
+      else
+       {
+         HOST_WIDE_INT size = allocate;
+
+         if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
+           size = 0x80000000 - get_stack_check_protect () - 1;
+
+         if (TARGET_STACK_PROBE)
+           {
+             if (crtl->is_leaf && !cfun->calls_alloca)
+               {
+                 if (size > probe_interval)
+                   ix86_emit_probe_stack_range (0, size, int_registers_saved);
+               }
+             else
+               ix86_emit_probe_stack_range (0,
+                                            size + get_stack_check_protect (),
+                                            int_registers_saved);
+           }
+         else
+           {
+             if (crtl->is_leaf && !cfun->calls_alloca)
+               {
+                 if (size > probe_interval
+                     && size > get_stack_check_protect ())
+                   ix86_emit_probe_stack_range (get_stack_check_protect (),
+                                                (size
+                                                 - get_stack_check_protect ()),
+                                                int_registers_saved);
+               }
+             else
+               ix86_emit_probe_stack_range (get_stack_check_protect (), size,
+                                            int_registers_saved);
+           }
+       }
+    }
+
+  if (allocate == 0)
+    ;
+  else if (!ix86_target_stack_probe ()
+          || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
+    {
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                                GEN_INT (-allocate), -1,
+                                m->fs.cfa_reg == stack_pointer_rtx);
+    }
+  else
+    {
+      abort();
+      rtx eax = gen_rtx_REG (Pmode, AX_REG);
+      rtx r10 = NULL;
+      const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
+      bool eax_live = ix86_eax_live_at_start_p ();
+      bool r10_live = false;
+
+      if (TARGET_64BIT)
+       r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
+
+      if (eax_live)
+       {
+         insn = emit_insn (gen_push (eax));
+         allocate -= UNITS_PER_WORD;
+         /* Note that SEH directives need to continue tracking the stack
+            pointer even after the frame pointer has been set up.  */
+         if (sp_is_cfa_reg || TARGET_SEH)
+           {
+             if (sp_is_cfa_reg)
+               m->fs.cfa_offset += UNITS_PER_WORD;
+             RTX_FRAME_RELATED_P (insn) = 1;
+             add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+                           gen_rtx_SET (stack_pointer_rtx,
+                                        plus_constant (Pmode,
+                                                       stack_pointer_rtx,
+                                                       -UNITS_PER_WORD)));
+           }
+       }
+
+      if (r10_live)
+       {
+         r10 = gen_rtx_REG (Pmode, R10_REG);
+         insn = emit_insn (gen_push (r10));
+         allocate -= UNITS_PER_WORD;
+         if (sp_is_cfa_reg || TARGET_SEH)
+           {
+             if (sp_is_cfa_reg)
+               m->fs.cfa_offset += UNITS_PER_WORD;
+             RTX_FRAME_RELATED_P (insn) = 1;
+             add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+                           gen_rtx_SET (stack_pointer_rtx,
+                                        plus_constant (Pmode,
+                                                       stack_pointer_rtx,
+                                                       -UNITS_PER_WORD)));
+           }
+       }
+
+      emit_move_insn (eax, GEN_INT (allocate));
+      emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
+
+      /* Use the fact that AX still contains ALLOCATE.  */
+      insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
+                       (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
+
+      if (sp_is_cfa_reg || TARGET_SEH)
+       {
+         if (sp_is_cfa_reg)
+           m->fs.cfa_offset += allocate;
+         RTX_FRAME_RELATED_P (insn) = 1;
+         add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+                       gen_rtx_SET (stack_pointer_rtx,
+                                    plus_constant (Pmode, stack_pointer_rtx,
+                                                   -allocate)));
+       }
+      m->fs.sp_offset += allocate;
+
+      /* Use stack_pointer_rtx for relative addressing so that code works for
+        realigned stack.  But this means that we need a blockage to prevent
+        stores based on the frame pointer from being scheduled before.  */
+      if (r10_live && eax_live)
+       {
+         t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
+         emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
+                         gen_frame_mem (word_mode, t));
+         t = plus_constant (Pmode, t, UNITS_PER_WORD);
+         emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
+                         gen_frame_mem (word_mode, t));
+         emit_insn (gen_memory_blockage ());
+       }
+      else if (eax_live || r10_live)
+       {
+         t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
+         emit_move_insn (gen_rtx_REG (word_mode,
+                                      (eax_live ? AX_REG : R10_REG)),
+                         gen_frame_mem (word_mode, t));
+         emit_insn (gen_memory_blockage ());
+       }
+    }
+  gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
+}
+
+static void
+ix86_dealloc_frame (void)
+{
+  struct machine_function *m = cfun->machine;
+  struct machine_frame_state frame_state_save = m->fs;
+
+  /* At this point the stack pointer must be valid, and we must have
+     restored all of the registers.  We may not have deallocated the
+     entire stack frame.  We've delayed this until now because it may
+     be possible to merge the local stack deallocation with the
+     deallocation forced by ix86_static_chain_on_stack.   */
+  gcc_assert (m->fs.sp_valid);
+  gcc_assert (!m->fs.sp_realigned);
+  gcc_assert (!m->fs.fp_valid);
+  gcc_assert (!m->fs.realigned);
+  if (m->fs.sp_offset != UNITS_PER_WORD)
+    {
+      int style = -1; // XXX
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                                GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
+                                style, true);
+    }
+
+  m->fs = frame_state_save;
+}
+
+static void
+ix86_process_components (sbitmap components, bool prologue_p)
+{
+  struct machine_function *m = cfun->machine;
+  struct ix86_frame *frame = &m->frame;
+  HOST_WIDE_INT cfa_offset = frame->reg_save_offset;
+  HOST_WIDE_INT sse_cfa_offset = frame->sse_reg_save_offset;
+
+  for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+       if (bitmap_bit_p (components, regno))
+         {
+           if (prologue_p)
+             ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+           else
+             ix86_emit_restore_reg_using_mov (regno, cfa_offset, true);
+         }
+       cfa_offset -= UNITS_PER_WORD;
+      }
+    else if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+       if (bitmap_bit_p (components, regno))
+         {
+           if (prologue_p)
+             ix86_emit_save_reg_using_mov (V4SFmode, regno, sse_cfa_offset);
+           else
+             ix86_emit_restore_reg_using_mov (regno, sse_cfa_offset, true);
+         }
+       sse_cfa_offset -= GET_MODE_SIZE (V4SFmode);
+      }
+}
+
+static void
+ix86_emit_prologue_components (sbitmap components)
+{
+  if (bitmap_bit_p (components, SW_FRAME))
+    ix86_init_frame_state ();
+
+  ix86_process_components (components, true);
+
+  if (bitmap_bit_p (components, SW_FRAME))
+    {
+      cfun->machine->frame_alloc_separately = true;
+      ix86_alloc_frame ();
+    }
+}
+
+static void
+ix86_emit_epilogue_components (sbitmap components)
+{
+  ix86_process_components (components, false);
+  if (bitmap_bit_p (components, SW_FRAME))
+    ix86_dealloc_frame ();
+}
+
+static void
+ix86_set_handled_components (sbitmap components)
+{
+  for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (bitmap_bit_p (components, regno))
+      cfun->machine->reg_wrapped_separately[regno] = true;
+  /*if (bitmap_bit_p (components, SW_FRAME))
+    cfun->machine->frame_alloc_separately = true;*/
+}
+
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS 
ix86_emit_prologue_components
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS 
ix86_emit_epilogue_components
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
+
 /* Extract the parts of an RTL expression that is a valid memory address
    for an instruction.  Return false if the structure of the address is
    grossly off.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index efd46a143136..ad55f5e7222e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2750,6 +2750,8 @@ struct GTY(()) machine_function {
   int varargs_gpr_size;
   int varargs_fpr_size;
   int optimize_mode_switching[MAX_386_ENTITIES];
+  bool reg_wrapped_separately[FIRST_PSEUDO_REGISTER];
+  bool frame_alloc_separately;
 
   /* Cached initial frame layout for the current function.  */
   struct ix86_frame frame;

Reply via email to