https://gcc.gnu.org/g:eb94eb73cf3993c1d544e6eb8c4dcb671f215b25
commit eb94eb73cf3993c1d544e6eb8c4dcb671f215b25 Author: Michael Matz <m...@suse.de> Date: Sun Jun 30 03:52:39 2024 +0200 x86: implement separate shrink wrapping Diff: --- gcc/config/i386/i386.cc | 581 +++++++++++++++++++++++++++++++++++++++++++----- gcc/config/i386/i386.h | 2 + 2 files changed, 533 insertions(+), 50 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4b6b665e5997..33e69e96008d 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -6970,7 +6970,7 @@ ix86_compute_frame_layout (void) } frame->save_regs_using_mov - = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue; + = (TARGET_PROLOGUE_USING_MOVE || flag_shrink_wrap_separate) && m->use_fast_prologue_epilogue; /* Skip return address and error code in exception handler. */ offset = INCOMING_FRAME_SP_OFFSET; @@ -7120,7 +7120,8 @@ ix86_compute_frame_layout (void) /* Size prologue needs to allocate. */ to_allocate = offset - frame->sse_reg_save_offset; - if ((!to_allocate && frame->nregs <= 1) + if ((!to_allocate && frame->nregs <= 1 + && !flag_shrink_wrap_separate) || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) /* If static stack checking is enabled and done with probes, the registers need to be saved before allocating the frame. */ @@ -7417,6 +7418,8 @@ ix86_emit_save_regs (void) int regno; rtx_insn *insn; + gcc_assert (!crtl->shrink_wrapped_separate); + if (!TARGET_APX_PUSH2POP2 || !ix86_can_use_push2pop2 () || cfun->machine->func_type != TYPE_NORMAL) @@ -7589,7 +7592,8 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) { - ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + if (!cfun->machine->reg_wrapped_separately[regno]) + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); cfa_offset -= UNITS_PER_WORD; } } @@ -7604,7 +7608,8 @@ ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) { - ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); + if (!cfun->machine->reg_wrapped_separately[regno]) + ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); cfa_offset -= GET_MODE_SIZE (V4SFmode); } } @@ -9089,6 +9094,7 @@ ix86_expand_prologue (void) = frame.sse_reg_save_offset - frame.reg_save_offset; gcc_assert (int_registers_saved); + gcc_assert (!m->frame_alloc_separately); /* No need to do stack checking as the area will be immediately written. */ @@ -9106,6 +9112,7 @@ ix86_expand_prologue (void) && flag_stack_clash_protection && !ix86_target_stack_probe ()) { + gcc_assert (!m->frame_alloc_separately); ix86_adjust_stack_and_probe (allocate, int_registers_saved, false); allocate = 0; } @@ -9116,6 +9123,7 @@ ix86_expand_prologue (void) { const HOST_WIDE_INT probe_interval = get_probe_interval (); + gcc_assert (!m->frame_alloc_separately); if (STACK_CHECK_MOVING_SP) { if (crtl->is_leaf @@ -9172,9 +9180,16 @@ ix86_expand_prologue (void) else if (!ix86_target_stack_probe () || frame.stack_pointer_offset < CHECK_STACK_LIMIT) { - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-allocate), -1, - m->fs.cfa_reg == stack_pointer_rtx); + if (!m->frame_alloc_separately) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-allocate), -1, + m->fs.cfa_reg == stack_pointer_rtx); + else + { + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_offset -= allocate; + m->fs.sp_offset += allocate; + } } else { @@ -9184,6 +9199,8 @@ ix86_expand_prologue (void) bool eax_live = ix86_eax_live_at_start_p (); bool r10_live = false; + gcc_assert (!m->frame_alloc_separately); + if (TARGET_64BIT) r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); @@ -9338,6 +9355,7 @@ ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) struct machine_function *m = cfun->machine; rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p)); + gcc_assert (!m->reg_wrapped_separately[REGNO (reg)]); ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); m->fs.sp_offset -= UNITS_PER_WORD; @@ -9396,6 +9414,9 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) const int offset = UNITS_PER_WORD * 2; rtx_insn *insn; + gcc_assert (!m->reg_wrapped_separately[REGNO (reg1)] + && !m->reg_wrapped_separately[REGNO (reg2)]); + rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); @@ -9468,6 +9489,7 @@ ix86_emit_restore_regs_using_pop (bool ppx_p) { unsigned int regno; + gcc_assert (!crtl->shrink_wrapped_separate); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p); @@ -9484,6 +9506,7 @@ ix86_emit_restore_regs_using_pop2 (void) int loaded_regnum = 0; bool aligned = cfun->machine->fs.sp_offset % 16 == 0; + gcc_assert (!crtl->shrink_wrapped_separate); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) { @@ -9551,42 +9574,72 @@ ix86_emit_leave (rtx_insn *insn) m->fs.fp_offset); } +static void +ix86_emit_restore_reg_using_mov (unsigned regno, HOST_WIDE_INT cfa_offset, + bool cfi_here) +{ + if (GENERAL_REGNO_P (regno)) + { + struct machine_function *m = cfun->machine; + rtx reg = gen_rtx_REG (word_mode, regno); + rtx mem; + rtx_insn *insn; + + mem = choose_baseaddr (cfa_offset, NULL); + mem = gen_frame_mem (word_mode, mem); + insn = emit_move_insn (reg, mem); + + if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) + { + /* Previously we'd represented the CFA as an expression + like *(%ebp - 8). We've just popped that value from + the stack, which means we need to reset the CFA to + the drap register. This will remain until we restore + the stack pointer. */ + add_reg_note (insn, REG_CFA_DEF_CFA, reg); + RTX_FRAME_RELATED_P (insn) = 1; + + /* This means that the DRAP register is valid for addressing. */ + m->fs.drap_valid = true; + } + else + ix86_add_cfa_restore_note (cfi_here ? insn : NULL, reg, cfa_offset); + } + else if (SSE_REGNO_P (regno)) + { + rtx reg = gen_rtx_REG (V4SFmode, regno); + rtx mem; + unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); + rtx_insn *insn; + + mem = choose_baseaddr (cfa_offset, &align); + mem = gen_rtx_MEM (V4SFmode, mem); + + /* The location aligment depends upon the base register. */ + align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); + gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); + set_mem_align (mem, align); + insn = emit_insn (gen_rtx_SET (reg, mem)); + + ix86_add_cfa_restore_note (cfi_here ? insn : NULL, reg, cfa_offset); + } + else + abort (); +} + /* Emit code to restore saved registers using MOV insns. First register is restored from CFA - CFA_OFFSET. */ static void ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, bool maybe_eh_return) { - struct machine_function *m = cfun->machine; unsigned int regno; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) { - rtx reg = gen_rtx_REG (word_mode, regno); - rtx mem; - rtx_insn *insn; - - mem = choose_baseaddr (cfa_offset, NULL); - mem = gen_frame_mem (word_mode, mem); - insn = emit_move_insn (reg, mem); - - if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) - { - /* Previously we'd represented the CFA as an expression - like *(%ebp - 8). We've just popped that value from - the stack, which means we need to reset the CFA to - the drap register. This will remain until we restore - the stack pointer. */ - add_reg_note (insn, REG_CFA_DEF_CFA, reg); - RTX_FRAME_RELATED_P (insn) = 1; - - /* This means that the DRAP register is valid for addressing. */ - m->fs.drap_valid = true; - } - else - ix86_add_cfa_restore_note (NULL, reg, cfa_offset); - + if (!cfun->machine->reg_wrapped_separately[regno]) + ix86_emit_restore_reg_using_mov (regno, cfa_offset, false); cfa_offset -= UNITS_PER_WORD; } } @@ -9602,21 +9655,8 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) { - rtx reg = gen_rtx_REG (V4SFmode, regno); - rtx mem; - unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); - - mem = choose_baseaddr (cfa_offset, &align); - mem = gen_rtx_MEM (V4SFmode, mem); - - /* The location aligment depends upon the base register. */ - align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); - gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); - set_mem_align (mem, align); - emit_insn (gen_rtx_SET (reg, mem)); - - ix86_add_cfa_restore_note (NULL, reg, cfa_offset); - + if (!cfun->machine->reg_wrapped_separately[regno]) + ix86_emit_restore_reg_using_mov (regno, cfa_offset, false); cfa_offset -= GET_MODE_SIZE (V4SFmode); } } @@ -9854,6 +9894,11 @@ ix86_expand_epilogue (int style) /* EH_RETURN requires the use of moves to function properly. */ if (crtl->calls_eh_return) restore_regs_via_mov = true; + else if (crtl->shrink_wrapped_separate) + { + gcc_assert (!TARGET_SEH); + restore_regs_via_mov = true; + } /* SEH requires the use of pops to identify the epilogue. */ else if (TARGET_SEH) restore_regs_via_mov = false; @@ -9888,6 +9933,7 @@ ix86_expand_epilogue (int style) && sp_valid_at (frame.stack_realign_offset + 1) && (frame.nsseregs + frame.nregs) != 0) { + gcc_assert (!m->frame_alloc_separately); pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (m->fs.sp_offset - frame.sse_reg_save_offset), @@ -9945,6 +9991,7 @@ ix86_expand_epilogue (int style) rtx sa = EH_RETURN_STACKADJ_RTX; rtx_insn *insn; + gcc_assert (!m->frame_alloc_separately); /* Stack realignment doesn't work with eh_return. */ if (crtl->stack_realign_needed) sorry ("Stack realignment not supported with " @@ -10017,6 +10064,7 @@ ix86_expand_epilogue (int style) } else { + gcc_assert (!m->frame_alloc_separately); /* SEH requires that the function end with (1) a stack adjustment if necessary, (2) a sequence of pops, and (3) a return or jump instruction. Prevent insns from the function body from @@ -10069,6 +10117,7 @@ ix86_expand_epilogue (int style) then do so now. */ if (m->fs.fp_valid) { + gcc_assert (!m->frame_alloc_separately); /* If the stack pointer is valid and pointing at the frame pointer store address, then we only need a pop. */ if (sp_valid_at (frame.hfp_save_offset) @@ -10095,6 +10144,7 @@ ix86_expand_epilogue (int style) rtx_insn *insn; gcc_assert (stack_realign_drap); + gcc_assert (!m->frame_alloc_separately); if (ix86_static_chain_on_stack) param_ptr_offset += UNITS_PER_WORD; @@ -10130,9 +10180,10 @@ ix86_expand_epilogue (int style) gcc_assert (!m->fs.realigned); if (m->fs.sp_offset != UNITS_PER_WORD) { - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), - style, true); + if (!m->frame_alloc_separately) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), + style, true); } else ix86_add_queued_cfa_restore_notes (get_last_insn ()); @@ -10730,6 +10781,436 @@ ix86_live_on_entry (bitmap regs) } } +/* Separate shrink-wrapping. */ +#define NCOMPONENTS (FIRST_PSEUDO_REGISTER + 1) +#define SW_FRAME FIRST_PSEUDO_REGISTER + +static bool +separate_frame_alloc_p (void) +{ + struct machine_function *m = cfun->machine; + if (frame_pointer_needed + || TARGET_SEH + || crtl->stack_realign_needed + || m->call_ms2sysv) + return false; + return true; +} + +static sbitmap +ix86_get_separate_components (void) +{ + struct machine_function *m = cfun->machine; + struct ix86_frame *frame = &m->frame; + sbitmap components; + + ix86_finalize_stack_frame_flags (); + if (!frame->save_regs_using_mov) + return NULL; + + components = sbitmap_alloc (NCOMPONENTS); + bitmap_clear (components); + + for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (ix86_save_reg (regno, true, true)) + { + bitmap_set_bit (components, regno); + } + + if (separate_frame_alloc_p ()) + bitmap_set_bit (components, SW_FRAME); + + return components; +} + +static sbitmap +ix86_components_for_bb (basic_block bb) +{ + bool need_frame = false; + sbitmap components = sbitmap_alloc (NCOMPONENTS); + bitmap_clear (components); + + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; + + for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (ix86_save_reg (regno, true, true) + && (bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno))) + { + bitmap_set_bit (components, regno); + /* XXX we don't really need a frame for saving registers, + we sometimes can use the red-zone. */ + need_frame = true; + } + + if (!need_frame && separate_frame_alloc_p ()) + { + HARD_REG_SET set_up_by_prologue, prologue_used; + rtx_insn *insn; + + CLEAR_HARD_REG_SET (prologue_used); + CLEAR_HARD_REG_SET (set_up_by_prologue); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, + HARD_FRAME_POINTER_REGNUM); + + FOR_BB_INSNS (bb, insn) + { + if (NONDEBUG_INSN_P (insn) + && requires_stack_frame_p (insn, prologue_used, + set_up_by_prologue)) + { + need_frame = true; + break; + + } + + } + } + if (need_frame) + bitmap_set_bit (components, SW_FRAME); + + return components; +} + +static void +ix86_disqualify_components (sbitmap, edge, sbitmap, bool) +{ +} + +static void +ix86_init_frame_state (void) +{ + struct machine_function *m = cfun->machine; + + memset (&m->fs, 0, sizeof (m->fs)); + + /* Initialize CFA state for before the prologue. */ + m->fs.cfa_reg = stack_pointer_rtx; + m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; + + /* Track SP offset to the CFA. We continue tracking this after we've + swapped the CFA register away from SP. In the case of re-alignment + this is fudged; we're interested to offsets within the local frame. */ + m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; + m->fs.sp_valid = true; + m->fs.sp_realigned = false; +} + +static void +ix86_alloc_frame (void) +{ + struct machine_function *m = cfun->machine; + const struct ix86_frame &frame = m->frame; + rtx insn, t; + bool int_registers_saved = true; + bool sse_registers_saved = true; + HOST_WIDE_INT allocate; + + memset (&m->fs, 0, sizeof (m->fs)); + + /* Initialize CFA state for before the prologue. */ + m->fs.cfa_reg = stack_pointer_rtx; + m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; + + /* Track SP offset to the CFA. We continue tracking this after we've + swapped the CFA register away from SP. In the case of re-alignment + this is fudged; we're interested to offsets within the local frame. */ + m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; + m->fs.sp_valid = true; + m->fs.sp_realigned = false; + + allocate = frame.stack_pointer_offset - m->fs.sp_offset; + + /* On SEH target with very large frame size, allocate an area to save + SSE registers (as the very large allocation won't be described). */ + if (TARGET_SEH + && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE + && !sse_registers_saved) + { + abort(); + } + + /* If stack clash protection is requested, then probe the stack, unless it + is already probed on the target. */ + if (allocate >= 0 + && flag_stack_clash_protection + && !ix86_target_stack_probe ()) + { + abort(); + ix86_adjust_stack_and_probe (allocate, int_registers_saved, false); + allocate = 0; + } + + /* The stack has already been decremented by the instruction calling us + so probe if the size is non-negative to preserve the protection area. */ + else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) + { + const HOST_WIDE_INT probe_interval = get_probe_interval (); + + abort(); + if (STACK_CHECK_MOVING_SP) + { + if (crtl->is_leaf + && !cfun->calls_alloca + && allocate <= probe_interval) + ; + + else + { + ix86_adjust_stack_and_probe (allocate, int_registers_saved, true); + allocate = 0; + } + } + + else + { + HOST_WIDE_INT size = allocate; + + if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) + size = 0x80000000 - get_stack_check_protect () - 1; + + if (TARGET_STACK_PROBE) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > probe_interval) + ix86_emit_probe_stack_range (0, size, int_registers_saved); + } + else + ix86_emit_probe_stack_range (0, + size + get_stack_check_protect (), + int_registers_saved); + } + else + { + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > probe_interval + && size > get_stack_check_protect ()) + ix86_emit_probe_stack_range (get_stack_check_protect (), + (size + - get_stack_check_protect ()), + int_registers_saved); + } + else + ix86_emit_probe_stack_range (get_stack_check_protect (), size, + int_registers_saved); + } + } + } + + if (allocate == 0) + ; + else if (!ix86_target_stack_probe () + || frame.stack_pointer_offset < CHECK_STACK_LIMIT) + { + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-allocate), -1, + m->fs.cfa_reg == stack_pointer_rtx); + } + else + { + abort(); + rtx eax = gen_rtx_REG (Pmode, AX_REG); + rtx r10 = NULL; + const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); + bool eax_live = ix86_eax_live_at_start_p (); + bool r10_live = false; + + if (TARGET_64BIT) + r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); + + if (eax_live) + { + insn = emit_insn (gen_push (eax)); + allocate -= UNITS_PER_WORD; + /* Note that SEH directives need to continue tracking the stack + pointer even after the frame pointer has been set up. */ + if (sp_is_cfa_reg || TARGET_SEH) + { + if (sp_is_cfa_reg) + m->fs.cfa_offset += UNITS_PER_WORD; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -UNITS_PER_WORD))); + } + } + + if (r10_live) + { + r10 = gen_rtx_REG (Pmode, R10_REG); + insn = emit_insn (gen_push (r10)); + allocate -= UNITS_PER_WORD; + if (sp_is_cfa_reg || TARGET_SEH) + { + if (sp_is_cfa_reg) + m->fs.cfa_offset += UNITS_PER_WORD; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -UNITS_PER_WORD))); + } + } + + emit_move_insn (eax, GEN_INT (allocate)); + emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax)); + + /* Use the fact that AX still contains ALLOCATE. */ + insn = emit_insn (gen_pro_epilogue_adjust_stack_sub + (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax)); + + if (sp_is_cfa_reg || TARGET_SEH) + { + if (sp_is_cfa_reg) + m->fs.cfa_offset += allocate; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -allocate))); + } + m->fs.sp_offset += allocate; + + /* Use stack_pointer_rtx for relative addressing so that code works for + realigned stack. But this means that we need a blockage to prevent + stores based on the frame pointer from being scheduled before. */ + if (r10_live && eax_live) + { + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); + emit_move_insn (gen_rtx_REG (word_mode, R10_REG), + gen_frame_mem (word_mode, t)); + t = plus_constant (Pmode, t, UNITS_PER_WORD); + emit_move_insn (gen_rtx_REG (word_mode, AX_REG), + gen_frame_mem (word_mode, t)); + emit_insn (gen_memory_blockage ()); + } + else if (eax_live || r10_live) + { + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); + emit_move_insn (gen_rtx_REG (word_mode, + (eax_live ? AX_REG : R10_REG)), + gen_frame_mem (word_mode, t)); + emit_insn (gen_memory_blockage ()); + } + } + gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); +} + +static void +ix86_dealloc_frame (void) +{ + struct machine_function *m = cfun->machine; + struct machine_frame_state frame_state_save = m->fs; + + /* At this point the stack pointer must be valid, and we must have + restored all of the registers. We may not have deallocated the + entire stack frame. We've delayed this until now because it may + be possible to merge the local stack deallocation with the + deallocation forced by ix86_static_chain_on_stack. */ + gcc_assert (m->fs.sp_valid); + gcc_assert (!m->fs.sp_realigned); + gcc_assert (!m->fs.fp_valid); + gcc_assert (!m->fs.realigned); + if (m->fs.sp_offset != UNITS_PER_WORD) + { + int style = -1; // XXX + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), + style, true); + } + + m->fs = frame_state_save; +} + +static void +ix86_process_components (sbitmap components, bool prologue_p) +{ + struct machine_function *m = cfun->machine; + struct ix86_frame *frame = &m->frame; + HOST_WIDE_INT cfa_offset = frame->reg_save_offset; + HOST_WIDE_INT sse_cfa_offset = frame->sse_reg_save_offset; + + for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + if (bitmap_bit_p (components, regno)) + { + if (prologue_p) + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + else + ix86_emit_restore_reg_using_mov (regno, cfa_offset, true); + } + cfa_offset -= UNITS_PER_WORD; + } + else if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + if (bitmap_bit_p (components, regno)) + { + if (prologue_p) + ix86_emit_save_reg_using_mov (V4SFmode, regno, sse_cfa_offset); + else + ix86_emit_restore_reg_using_mov (regno, sse_cfa_offset, true); + } + sse_cfa_offset -= GET_MODE_SIZE (V4SFmode); + } +} + +static void +ix86_emit_prologue_components (sbitmap components) +{ + if (bitmap_bit_p (components, SW_FRAME)) + ix86_init_frame_state (); + + ix86_process_components (components, true); + + if (bitmap_bit_p (components, SW_FRAME)) + { + cfun->machine->frame_alloc_separately = true; + ix86_alloc_frame (); + } +} + +static void +ix86_emit_epilogue_components (sbitmap components) +{ + ix86_process_components (components, false); + if (bitmap_bit_p (components, SW_FRAME)) + ix86_dealloc_frame (); +} + +static void +ix86_set_handled_components (sbitmap components) +{ + for (unsigned regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (bitmap_bit_p (components, regno)) + cfun->machine->reg_wrapped_separately[regno] = true; + /*if (bitmap_bit_p (components, SW_FRAME)) + cfun->machine->frame_alloc_separately = true;*/ +} + +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components +#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB +#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb +#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS +#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS ix86_emit_prologue_components +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS ix86_emit_epilogue_components +#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS +#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components + /* Extract the parts of an RTL expression that is a valid memory address for an instruction. Return false if the structure of the address is grossly off. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index efd46a143136..ad55f5e7222e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2750,6 +2750,8 @@ struct GTY(()) machine_function { int varargs_gpr_size; int varargs_fpr_size; int optimize_mode_switching[MAX_386_ENTITIES]; + bool reg_wrapped_separately[FIRST_PSEUDO_REGISTER]; + bool frame_alloc_separately; /* Cached initial frame layout for the current function. */ struct ix86_frame frame;