On 11/06/2015 04:52 PM, Sebastian Pop wrote:
opinion). If you want a half-finished redzone allocator, I can send you a
patch.
Yes please. Let's get it work.
Here you go. This is incomplete and does not compile, but it shows the
direction I have in mind and isn't too far off. I had a similar patch
once for a machine that had two stack pointers (don't ask), and I
started to recreate that for the given problem last week.
The temp slot handling code in function.c needs more frame arguments,
but I got halfway through them and started wondering whether they should
be member functions of struct frame_info instead.
The bits in cfgexpand and function, once complete, are essentially all
that's necessary to support a second frame, but for this to work as a
redzone allocator it needs to be integrated with target (i.e. i386)
frame layout code. For purposes of optimizing we may also want to
establish a maximum frame size for the rz_frame.
Bonus points if reload/lra use this for spilled pseudos that don't live
across calls, but I can have a go at that if you don't feel like
tackling it.
Bernd
diff --git a/gcc/caller-save.c b/gcc/caller-save.c
index 084d079..c3a5256 100644
--- a/gcc/caller-save.c
+++ b/gcc/caller-save.c
@@ -654,7 +654,7 @@ setup_save_areas (void)
{
saved_reg->slot
= assign_stack_local_1
- (regno_save_mode[regno][1],
+ (&crtl->frame, regno_save_mode[regno][1],
GET_MODE_SIZE (regno_save_mode[regno][1]), 0,
ASLK_REDUCE_ALIGN);
if (dump_file != NULL)
@@ -712,7 +712,8 @@ setup_save_areas (void)
when we restore and save the hard register in
insert_restore and insert_save. */
regno_save_mem[i][j]
- = assign_stack_local_1 (regno_save_mode[i][j],
+ = assign_stack_local_1 (&crtl->frame,
+ regno_save_mode[i][j],
GET_MODE_SIZE (regno_save_mode[i][j]),
0, ASLK_REDUCE_ALIGN);
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index bfbc958..8825217 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -335,11 +335,6 @@ static bitmap_obstack stack_var_bitmap_obstack;
is non-decreasing. */
static size_t *stack_vars_sorted;
-/* The phase of the stack frame. This is the known misalignment of
- virtual_stack_vars_rtx from PREFERRED_STACK_BOUNDARY. That is,
- (frame_offset+frame_phase) % PREFERRED_STACK_BOUNDARY == 0. */
-static int frame_phase;
-
/* Used during expand_used_vars to remember if we saw any decls for
which we'd like to enable stack smashing protection. */
static bool has_protected_decls;
@@ -375,32 +370,34 @@ align_base (HOST_WIDE_INT base, unsigned HOST_WIDE_INT align, bool align_up)
return align_up ? (base + align - 1) & -align : base & -align;
}
-/* Allocate SIZE bytes at byte alignment ALIGN from the stack frame.
+/* Allocate SIZE bytes at byte alignment ALIGN from the stack frame FRAME.
Return the frame offset. */
static HOST_WIDE_INT
-alloc_stack_frame_space (HOST_WIDE_INT size, unsigned HOST_WIDE_INT align)
+alloc_stack_frame_space (frame_info *frame, HOST_WIDE_INT size,
+ unsigned HOST_WIDE_INT align)
{
HOST_WIDE_INT offset, new_frame_offset;
- if (FRAME_GROWS_DOWNWARD)
+ if (frame->grows_downward)
{
- new_frame_offset
- = align_base (frame_offset - frame_phase - size,
- align, false) + frame_phase;
+ new_frame_offset = align_base (frame->frame_offset - frame->phase - size,
+ align, false);
+ new_frame_offset += frame->phase;
offset = new_frame_offset;
}
else
{
- new_frame_offset
- = align_base (frame_offset - frame_phase, align, true) + frame_phase;
+ new_frame_offset = align_base (frame->frame_offset - frame->phase,
+ align, true);
+ new_frame_offset += frame->phase;
offset = new_frame_offset;
new_frame_offset += size;
}
- frame_offset = new_frame_offset;
+ frame->frame_offset = new_frame_offset;
- if (frame_offset_overflow (frame_offset, cfun->decl))
- frame_offset = offset = 0;
+ if (frame_offset_overflow (frame->frame_offset, cfun->decl))
+ frame->frame_offset = offset = 0;
return offset;
}
@@ -965,11 +962,11 @@ dump_stack_var_partition (void)
}
}
-/* Assign rtl to DECL at BASE + OFFSET. */
+/* Assign rtl to DECL at BASE + OFFSET in frame FRAME. */
static void
-expand_one_stack_var_at (tree decl, rtx base, unsigned base_align,
- HOST_WIDE_INT offset)
+expand_one_stack_var_at (frame_info *frame, tree decl, rtx base,
+ unsigned base_align, HOST_WIDE_INT offset)
{
unsigned align;
rtx x;
@@ -988,7 +985,7 @@ expand_one_stack_var_at (tree decl, rtx base, unsigned base_align,
If it is we generate stack slots only accidentally so it isn't as
important, we'll simply use the alignment that is already set. */
if (base == virtual_stack_vars_rtx)
- offset -= frame_phase;
+ offset -= frame->phase;
align = offset & -offset;
align *= BITS_PER_UNIT;
if (align == 0 || align > base_align)
@@ -1120,7 +1117,7 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
FRAME_GROWS_DOWNWARD);
tree repr_decl = NULL_TREE;
offset
- = alloc_stack_frame_space (stack_vars[i].size
+ = alloc_stack_frame_space (&crtl->frame, stack_vars[i].size
+ ASAN_RED_ZONE_SIZE,
MAX (alignb, ASAN_RED_ZONE_SIZE));
@@ -1157,7 +1154,8 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
}
else
{
- offset = alloc_stack_frame_space (stack_vars[i].size, alignb);
+ offset = alloc_stack_frame_space (&crtl->frame,
+ stack_vars[i].size, alignb);
base_align = crtl->max_used_stack_slot_alignment;
}
}
@@ -1181,9 +1179,8 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
partition. */
for (j = i; j != EOC; j = stack_vars[j].next)
{
- expand_one_stack_var_at (stack_vars[j].decl,
- base, base_align,
- offset);
+ expand_one_stack_var_at (&crtl->frame, stack_vars[j].decl,
+ base, base_align, offset);
}
}
@@ -1276,9 +1273,9 @@ expand_one_stack_var_1 (tree var)
/* We handle highly aligned variables in expand_stack_vars. */
gcc_assert (byte_align * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT);
- offset = alloc_stack_frame_space (size, byte_align);
+ offset = alloc_stack_frame_space (&crtl->frame, size, byte_align);
- expand_one_stack_var_at (var, virtual_stack_vars_rtx,
+ expand_one_stack_var_at (&crtl->frame, var, virtual_stack_vars_rtx,
crtl->max_used_stack_slot_alignment, offset);
}
@@ -1995,13 +1992,6 @@ expand_used_vars (void)
unsigned len;
bool gen_stack_protect_signal = false;
- /* Compute the phase of the stack frame for this function. */
- {
- int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
- int off = STARTING_FRAME_OFFSET % align;
- frame_phase = off ? align - off : 0;
- }
-
/* Set TREE_USED on all variables in the local_decls. */
FOR_EACH_LOCAL_DECL (cfun, i, var)
TREE_USED (var) = 1;
@@ -2197,12 +2187,13 @@ expand_used_vars (void)
redzonesz = ((sz + ASAN_RED_ZONE_SIZE + data.asan_alignb - 1)
& ~(data.asan_alignb - HOST_WIDE_INT_1)) - sz;
offset
- = alloc_stack_frame_space (redzonesz, ASAN_RED_ZONE_SIZE);
+ = alloc_stack_frame_space (&crtl->frame, redzonesz, ASAN_RED_ZONE_SIZE);
data.asan_vec.safe_push (prev_offset);
data.asan_vec.safe_push (offset);
/* Leave space for alignment if STRICT_ALIGNMENT. */
if (STRICT_ALIGNMENT)
- alloc_stack_frame_space ((GET_MODE_ALIGNMENT (SImode)
+ alloc_stack_frame_space (&crtl->frame,
+ (GET_MODE_ALIGNMENT (SImode)
<< ASAN_SHADOW_SHIFT)
/ BITS_PER_UNIT, 1);
diff --git a/gcc/emit-rtl.h b/gcc/emit-rtl.h
index f52c335..8f90fd0 100644
--- a/gcc/emit-rtl.h
+++ b/gcc/emit-rtl.h
@@ -23,7 +23,7 @@ along with GCC; see the file COPYING3. If not see
struct temp_slot;
typedef struct temp_slot *temp_slot_p;
-/* Information mainlined about RTL representation of incoming arguments. */
+/* Information maintained about RTL representation of incoming arguments. */
struct GTY(()) incoming_args {
/* Number of bytes of args popped by function being compiled on its return.
Zero if no bytes are to be popped.
@@ -52,6 +52,33 @@ struct GTY(()) incoming_args {
rtx internal_arg_pointer;
};
+/* Information maintained about layout and size of a stack frame. */
+struct GTY(()) frame_info {
+ /* Offset to end of allocated area of stack frame.
+ If stack grows down, this is the address of the last stack slot allocated.
+ If stack grows up, this is the address for the next slot. */
+ HOST_WIDE_INT frame_offset;
+
+ /* List of all used temporaries allocated, by level. */
+ VEC(temp_slot_p,gc) *used_temp_slots;
+
+ /* List of available temp slots. */
+ struct temp_slot *avail_temp_slots;
+
+ /* The phase of the frame offset. */
+ int phase;
+
+ /* List of empty areas in the stack frame. */
+ struct frame_space *frame_space_list;
+
+ /* The base register to be used for this frame. */
+ rtx base;
+
+ /* True if FRAME_GROWS_DOWNWARD (or any similar definition)
+ applies to this particular frame. */
+ bool grows_downward;
+};
+
/* Datastructures maintained for currently processed function in RTL form. */
struct GTY(()) rtl_data {
@@ -106,12 +133,15 @@ struct GTY(()) rtl_data {
Made for the sake of unshare_all_rtl. */
rtx_expr_list *x_stack_slot_list;
- /* List of empty areas in the stack frame. */
- struct frame_space *frame_space_list;
-
/* Place after which to insert the tail_recursion_label if we need one. */
rtx_note *x_stack_check_probe_note;
+ /* Description of the regular stack frame. */
+ frame_info *frame;
+
+ /* Description of the redzone stack frame. */
+ frame_info *rz_frame;
+
/* Location at which to save the argument pointer if it will need to be
referenced. There are two cases where this is done: if nonlocal gotos
exist, or if vars stored at an offset from the argument pointer will be
@@ -121,23 +151,9 @@ struct GTY(()) rtl_data {
/* Dynamic Realign Argument Pointer used for realigning stack. */
rtx drap_reg;
- /* Offset to end of allocated area of stack frame.
- If stack grows down, this is the address of the last stack slot allocated.
- If stack grows up, this is the address for the next slot. */
- HOST_WIDE_INT x_frame_offset;
-
/* Insn after which register parms and SAVE_EXPRs are born, if nonopt. */
rtx_insn *x_parm_birth_insn;
- /* List of all used temporaries allocated, by level. */
- vec<temp_slot_p, va_gc> *x_used_temp_slots;
-
- /* List of available temp slots. */
- struct temp_slot *x_avail_temp_slots;
-
- /* Current nesting level for temporaries. */
- int x_temp_slot_level;
-
/* The largest alignment needed on the stack, including requirement
for outgoing stack alignment. */
unsigned int stack_alignment_needed;
@@ -290,11 +306,8 @@ struct GTY(()) rtl_data {
#define naked_return_label (crtl->x_naked_return_label)
#define stack_slot_list (crtl->x_stack_slot_list)
#define parm_birth_insn (crtl->x_parm_birth_insn)
-#define frame_offset (crtl->x_frame_offset)
#define stack_check_probe_note (crtl->x_stack_check_probe_note)
#define arg_pointer_save_area (crtl->x_arg_pointer_save_area)
-#define used_temp_slots (crtl->x_used_temp_slots)
-#define avail_temp_slots (crtl->x_avail_temp_slots)
#define temp_slot_level (crtl->x_temp_slot_level)
#define nonlocal_goto_handler_labels (crtl->x_nonlocal_goto_handler_labels)
#define frame_pointer_needed (crtl->frame_pointer_needed)
diff --git a/gcc/function.c b/gcc/function.c
index a637cb3..9aec731 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -217,10 +217,10 @@ free_after_compilation (struct function *f)
HOST_WIDE_INT
get_frame_size (void)
{
- if (FRAME_GROWS_DOWNWARD)
- return -frame_offset;
+ if (-crtl->frame.grows_downward)
+ return -crtl->frame.frame_offset;
else
- return frame_offset;
+ return crtl->frame.frame_offset;
}
/* Issue an error message and return TRUE if frame OFFSET overflows in
@@ -272,19 +272,14 @@ get_stack_local_alignment (tree type, machine_mode mode)
given a start/length pair that lies at the end of the frame. */
static bool
-try_fit_stack_local (HOST_WIDE_INT start, HOST_WIDE_INT length,
+try_fit_stack_local (frame_info *frame,
+ HOST_WIDE_INT start, HOST_WIDE_INT length,
HOST_WIDE_INT size, unsigned int alignment,
HOST_WIDE_INT *poffset)
{
HOST_WIDE_INT this_frame_offset;
int frame_off, frame_alignment, frame_phase;
- /* Calculate how many bytes the start of local variables is off from
- stack alignment. */
- frame_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
- frame_off = STARTING_FRAME_OFFSET % frame_alignment;
- frame_phase = frame_off ? frame_alignment - frame_off : 0;
-
/* Round the frame offset to the specified alignment. */
/* We must be careful here, since FRAME_OFFSET might be negative and
@@ -293,14 +288,14 @@ try_fit_stack_local (HOST_WIDE_INT start, HOST_WIDE_INT length,
use logical operations which are unambiguous. */
if (FRAME_GROWS_DOWNWARD)
this_frame_offset
- = (FLOOR_ROUND (start + length - size - frame_phase,
+ = (FLOOR_ROUND (start + length - size - frame->phase,
(unsigned HOST_WIDE_INT) alignment)
- + frame_phase);
+ + frame->phase);
else
this_frame_offset
= (CEIL_ROUND (start - frame_phase,
(unsigned HOST_WIDE_INT) alignment)
- + frame_phase);
+ + frame->phase);
/* See if it fits. If this space is at the edge of the frame,
consider extending the frame to make it fit. Our caller relies on
@@ -324,17 +319,17 @@ try_fit_stack_local (HOST_WIDE_INT start, HOST_WIDE_INT length,
function's frame_space_list. */
static void
-add_frame_space (HOST_WIDE_INT start, HOST_WIDE_INT end)
+add_frame_space (frame_info *frame, HOST_WIDE_INT start, HOST_WIDE_INT end)
{
struct frame_space *space = ggc_alloc<frame_space> ();
- space->next = crtl->frame_space_list;
- crtl->frame_space_list = space;
+ space->next = frame->frame_space_list;
+ frame->frame_space_list = space;
space->start = start;
space->length = end - start;
}
-/* Allocate a stack slot of SIZE bytes and return a MEM rtx for it
- with machine mode MODE.
+/* Allocate a stack slot of SIZE bytes in FRAME and return a MEM rtx
+ for it with machine mode MODE.
ALIGN controls the amount of alignment for the address of the slot:
0 means according to MODE,
@@ -351,12 +346,13 @@ add_frame_space (HOST_WIDE_INT start, HOST_WIDE_INT end)
We do not round to stack_boundary here. */
rtx
-assign_stack_local_1 (machine_mode mode, HOST_WIDE_INT size,
- int align, int kind)
+assign_stack_local_1 (frame_info *frame, machine_mode mode,
+ HOST_WIDE_INT size, int align, int kind)
{
rtx x, addr;
int bigend_correction = 0;
HOST_WIDE_INT slot_offset = 0, old_frame_offset;
+ HOST_WIDE_INT frame_offset = frame->frame_offset;
unsigned int alignment, alignment_in_bits;
if (align == 0)
@@ -424,17 +420,17 @@ assign_stack_local_1 (machine_mode mode, HOST_WIDE_INT size,
{
struct frame_space **psp;
- for (psp = &crtl->frame_space_list; *psp; psp = &(*psp)->next)
+ for (psp = &frame->frame_space_list; *psp; psp = &(*psp)->next)
{
struct frame_space *space = *psp;
- if (!try_fit_stack_local (space->start, space->length, size,
- alignment, &slot_offset))
+ if (!try_fit_stack_local (frame, space->start, space->length,
+ size, alignment, &slot_offset))
continue;
*psp = space->next;
if (slot_offset > space->start)
- add_frame_space (space->start, slot_offset);
+ add_frame_space (frame, space->start, slot_offset);
if (slot_offset + size < space->start + space->length)
- add_frame_space (slot_offset + size,
+ add_frame_space (frame, slot_offset + size,
space->start + space->length);
goto found_space;
}
@@ -450,28 +446,30 @@ assign_stack_local_1 (machine_mode mode, HOST_WIDE_INT size,
if (FRAME_GROWS_DOWNWARD)
{
- frame_offset -= size;
- try_fit_stack_local (frame_offset, size, size, alignment, &slot_offset);
+ frame->x_frame_offset -= size;
+ try_fit_stack_local (frame, frame_offset, size, size, alignment,
+ &slot_offset);
if (kind & ASLK_RECORD_PAD)
{
if (slot_offset > frame_offset)
- add_frame_space (frame_offset, slot_offset);
+ add_frame_space (frame, frame_offset, slot_offset);
if (slot_offset + size < old_frame_offset)
- add_frame_space (slot_offset + size, old_frame_offset);
+ add_frame_space (frame, slot_offset + size, old_frame_offset);
}
}
else
{
- frame_offset += size;
- try_fit_stack_local (old_frame_offset, size, size, alignment, &slot_offset);
+ frame->x_frame_offset += size;
+ try_fit_stack_local (frame, old_frame_offset, size, size, alignment,
+ &slot_offset);
if (kind & ASLK_RECORD_PAD)
{
if (slot_offset > old_frame_offset)
- add_frame_space (old_frame_offset, slot_offset);
+ add_frame_space (frame, old_frame_offset, slot_offset);
if (slot_offset + size < frame_offset)
- add_frame_space (slot_offset + size, frame_offset);
+ add_frame_space (frame, slot_offset + size, frame_offset);
}
}
@@ -481,10 +479,11 @@ assign_stack_local_1 (machine_mode mode, HOST_WIDE_INT size,
if (BYTES_BIG_ENDIAN && mode != BLKmode && GET_MODE_SIZE (mode) < size)
bigend_correction = size - GET_MODE_SIZE (mode);
+ rtx reg = frame->base;
/* If we have already instantiated virtual registers, return the actual
address relative to the frame pointer. */
- if (virtuals_instantiated)
- addr = plus_constant (Pmode, frame_pointer_rtx,
+ if (reg != frame_pointer_rtx || virtuals_instantiated)
+ addr = plus_constant (Pmode, reg,
trunc_int_for_mode
(slot_offset + bigend_correction
+ STARTING_FRAME_OFFSET, Pmode));
@@ -504,6 +503,7 @@ assign_stack_local_1 (machine_mode mode, HOST_WIDE_INT size,
if (frame_offset_overflow (frame_offset, current_function_decl))
frame_offset = 0;
+ frame->frame_offset = frame_offset;
return x;
}
@@ -573,7 +573,6 @@ struct temp_address_hasher : ggc_ptr_hash<temp_slot_address_entry>
/* A table of addresses that represent a stack slot. The table is a mapping
from address RTXen to a temp slot. */
static GTY(()) hash_table<temp_address_hasher> *temp_slot_address_table;
-static size_t n_temp_slots_in_use;
/* Removes temporary slot TEMP from LIST. */
@@ -602,35 +601,35 @@ insert_slot_to_list (struct temp_slot *temp, struct temp_slot **list)
*list = temp;
}
-/* Returns the list of used temp slots at LEVEL. */
+/* Returns the list of used temp slots at LEVEL in frame FRAME. */
static struct temp_slot **
-temp_slots_at_level (int level)
+temp_slots_at_level (frame_info *frame, int level)
{
- if (level >= (int) vec_safe_length (used_temp_slots))
- vec_safe_grow_cleared (used_temp_slots, level + 1);
+ if (level >= (int) vec_safe_length (frame->used_temp_slots))
+ vec_safe_grow_cleared (frame->used_temp_slots, level + 1);
- return &(*used_temp_slots)[level];
+ return &(*frame->used_temp_slots)[level];
}
-/* Returns the maximal temporary slot level. */
+/* Returns the maximal temporary slot level in frame FRAME. */
static int
-max_slot_level (void)
+max_slot_level (frame_info *frame)
{
- if (!used_temp_slots)
+ if (!frame->used_temp_slots)
return -1;
- return used_temp_slots->length () - 1;
+ return frame->used_temp_slots->length () - 1;
}
-/* Moves temporary slot TEMP to LEVEL. */
+/* Moves temporary slot TEMP to LEVEL in frame FRAME. */
static void
-move_slot_to_level (struct temp_slot *temp, int level)
+move_slot_to_level (frame_info *frame, struct temp_slot *temp, int level)
{
- cut_slot_from_list (temp, temp_slots_at_level (temp->level));
- insert_slot_to_list (temp, temp_slots_at_level (level));
+ cut_slot_from_list (temp, temp_slots_at_level (frame, temp->level));
+ insert_slot_to_list (temp, temp_slots_at_level (frame, level));
temp->level = level;
}
@@ -1196,16 +1195,40 @@ pop_temp_slots (void)
temp_slot_level--;
}
+/* Initialize temporary slots for frame FRAME. DOWNWARD is the value of
+ the applicable FRAME_GROWS_DOWNWARD setting. BASE is the base register
+ for this frame. */
+
+void
+init_temp_slots_frame (frame_info *frame, rtx base, bool downward)
+{
+ /* We have not allocated any temporaries yet. */
+ frame->avail_temp_slots = 0;
+ vec_alloc (frame->used_temp_slots, 0);
+ frame->temp_slot_level = 0;
+ frame->n_temp_slots_in_use = 0;
+ frame->grows_downward = downward;
+ frame->base = base;
+}
+
/* Initialize temporary slots. */
void
init_temp_slots (void)
{
- /* We have not allocated any temporaries yet. */
- avail_temp_slots = 0;
- vec_alloc (used_temp_slots, 0);
- temp_slot_level = 0;
- n_temp_slots_in_use = 0;
+ init_temp_slots_frame (&crtl->frame, frame_pointer_rtx,
+ FRAME_GROWS_DOWNWARD);
+ init_temp_slots_frame (&crtl->rz_frame, stack_pointer_rtx, false);
+
+ /* Calculate how many bytes the start of local variables is off from
+ stack alignment. This is only used for the regular frame, if it
+ becomes necessary to do something for the redzone frame, we should
+ add the necessary starting offset macro. */
+ int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+ int off = STARTING_FRAME_OFFSET % align;
+
+ crtl->frame.phase = off ? align - off : 0;
+ crtl->rz_frame.phase = 0;
/* Set up the table to map addresses to temp slots. */
if (! temp_slot_address_table)
diff --git a/gcc/function.h b/gcc/function.h
index b2e4f71..b436f73 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -563,7 +563,8 @@ extern HOST_WIDE_INT get_frame_size (void);
return FALSE. */
extern bool frame_offset_overflow (HOST_WIDE_INT, tree);
-extern rtx assign_stack_local_1 (machine_mode, HOST_WIDE_INT, int, int);
+extern rtx assign_stack_local_1 (frame_info *, machine_mode, HOST_WIDE_INT,
+ int, int);
extern rtx assign_stack_local (machine_mode, HOST_WIDE_INT, int);
extern rtx assign_stack_temp_for_type (machine_mode, HOST_WIDE_INT, tree);
extern rtx assign_stack_temp (machine_mode, HOST_WIDE_INT);