On Mon, Oct 5, 2015 at 7:33 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
>> As shown in PR 66697 [1] and WineHQ bug [2], an application can >> misalign incoming stack to less than ABI mandated 16 bytes. While it >> is possible to use -mincoming-stack-boundary=2 (= 4 bytes) for 32 bit >> targets to emit stack realignment code, this option is artificially >> limited to 4 (= 16 bytes) for 64bit targets. > > Attached v2 patch goes all the way to enable -mstackrealign and > 'force_align_arg_pointer' attribute for x86_64. In addition to > -mincoming-stack-boundary changes, the patch changes > MIN_STACK_BOUNDARY definition to 8bytes on 64bit targets, as this is > really the minimum supported stack boundary. > > This patch is also needed to allow stack realignment in the interrupt handler. V3 adds support for unaligned SSE moves outside stack realignment area. 2015-10-06 Uros Bizjak <ubiz...@gmail.com> PR target/66697 * config/i386/i386.c (ix86_option_override_internal): Always use 8-byte minimum stack boundary in 64-bit mode. (ix86_compute_frame_layout): Remove assert on INCOMING_STACK_BOUNDARY. (ix86_emit_save_reg_using_mov): Support unaligned SSE store. Add a REG_CFA_EXPRESSION note if needed. (ix86_emit_restore_sse_regs_using_mov): Support unaligned SSE load. (ix86_handle_force_align_arg_pointer_attribute): New. (ix86_minimum_incoming_stack_boundary): Remove TARGET_64BIT check. (ix86_attribute_table): Set ix86_force_align_arg_pointer_string with ix86_handle_force_align_arg_pointer_attribute. * config/i386/i386.h (MIN_STACK_BOUNDARY): Set to BITS_PER_WORD. testsuite/ChangeLog: 2015-10-06 Uros Bizjak <ubiz...@gmail.com> PR target/66697 * gcc.target/i386/20060512-1.c: Remove ia32 requirement. (PUSH, POP): New defines. (sse2_test): Use PUSH and POP to misalign runtime stack. * gcc.target/i386/20060512-2.c: Remove ia32 requirement. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. An earlier version of this patch was also used to compile 64bit Wine, where it reportedly fixes all unaligned stack failures for x86_64 target. Please see WineHQ bugreport. I'll wait for a reconfirmation that this slightly changed patch also works OK for Wine people, before it is committed to mainline and later backported to gcc-5 branch. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 228530) +++ config/i386/i386.c (working copy) @@ -5209,8 +5209,7 @@ ix86_option_override_internal (bool main_args_p, ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; if (opts_set->x_ix86_incoming_stack_boundary_arg) { - int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags) - ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2); + int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; if (opts->x_ix86_incoming_stack_boundary_arg < min || opts->x_ix86_incoming_stack_boundary_arg > 12) @@ -11391,7 +11390,6 @@ ix86_compute_frame_layout (struct ix86_frame *fram /* The only ABI that has saved SSE registers (Win64) also has a 16-byte aligned default stack, and thus we don't need to be within the re-aligned local stack frame to save them. */ - gcc_assert (INCOMING_STACK_BOUNDARY >= 128); offset = ROUND_UP (offset, 16); offset += frame->nsseregs * 16; } @@ -11616,14 +11614,26 @@ ix86_emit_save_reg_using_mov (machine_mode mode, u struct machine_function *m = cfun->machine; rtx reg = gen_rtx_REG (mode, regno); rtx mem, addr, base, insn; + unsigned int align; addr = choose_baseaddr (cfa_offset); mem = gen_frame_mem (mode, addr); - /* For SSE saves, we need to indicate the 128-bit alignment. */ - set_mem_align (mem, GET_MODE_ALIGNMENT (mode)); + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); - insn = emit_move_insn (mem, reg); + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned store. */ + if (mode == V4SFmode && align < 128) + { + rtx unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU); + insn = emit_insn (gen_rtx_SET (mem, unspec)); + } + else + insn = emit_insn (gen_rtx_SET (mem, reg)); + RTX_FRAME_RELATED_P (insn) = 1; base = addr; @@ -11670,6 +11680,8 @@ ix86_emit_save_reg_using_mov (machine_mode mode, u mem = gen_rtx_MEM (mode, addr); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); } + else + add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); } /* Emit code to save registers using MOV insns. @@ -11886,6 +11898,25 @@ find_drap_reg (void) } } +/* Handle a "force_align_arg_pointer" attribute. */ + +static tree +ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, + tree, int, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* Return minimum incoming stack alignment. */ static unsigned int @@ -11900,7 +11931,6 @@ ix86_minimum_incoming_stack_boundary (bool sibcall if -mstackrealign is used, it isn't used for sibcall check and estimated stack alignment is 128bit. */ else if (!sibcall - && !TARGET_64BIT && ix86_force_align_arg_pointer && crtl->stack_alignment_estimated == 128) incoming_stack_boundary = MIN_STACK_BOUNDARY; @@ -13184,12 +13214,27 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_IN { rtx reg = gen_rtx_REG (V4SFmode, regno); rtx mem; + unsigned int align; mem = choose_baseaddr (cfa_offset); mem = gen_rtx_MEM (V4SFmode, mem); - set_mem_align (mem, 128); - emit_move_insn (reg, mem); + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned load. */ + if (align < 128) + { + rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem), + UNSPEC_LOADU); + emit_insn (gen_rtx_SET (reg, unspec)); + } + else + emit_insn (gen_rtx_SET (reg, mem)); + ix86_add_cfa_restore_note (NULL, reg, cfa_offset); cfa_offset -= GET_MODE_SIZE (V4SFmode); @@ -48159,7 +48204,7 @@ static const struct attribute_spec ix86_attribute_ true }, /* force_align_arg_pointer says this function realigns the stack at entry. */ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, - false, true, true, ix86_handle_cconv_attribute, false }, + false, true, true, ix86_handle_force_align_arg_pointer_attribute, false }, #if TARGET_DLLIMPORT_DECL_ATTRIBUTES { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 228530) +++ config/i386/i386.h (working copy) @@ -755,7 +755,7 @@ extern const char *host_detect_local_cpu (int argc #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) /* Minimum stack boundary. */ -#define MIN_STACK_BOUNDARY (TARGET_64BIT ? (TARGET_SSE ? 128 : 64) : 32) +#define MIN_STACK_BOUNDARY BITS_PER_WORD /* Boundary (in *bits*) on which the stack pointer prefers to be aligned; the compiler cannot rely on having this alignment. */ Index: testsuite/gcc.target/i386/20060512-1.c =================================================================== --- testsuite/gcc.target/i386/20060512-1.c (revision 228530) +++ testsuite/gcc.target/i386/20060512-1.c (working copy) @@ -1,5 +1,4 @@ /* { dg-do run } */ -/* { dg-require-effective-target ia32 } */ /* { dg-options "-std=gnu99 -msse2 -mpreferred-stack-boundary=4" } */ /* { dg-require-effective-target sse2 } */ @@ -7,6 +6,14 @@ #include <emmintrin.h> +#ifdef __x86_64__ +# define PUSH "pushq %rsi" +# define POP "popq %rsi" +#else +# define PUSH "pushl %esi" +# define POP "popl %esi" +#endif + __m128i __attribute__ ((__noinline__)) vector_using_function () { @@ -27,9 +34,9 @@ static void sse2_test (void) { int result; - asm ("pushl %esi"); /* Disalign runtime stack. */ + asm (PUSH); /* Misalign runtime stack. */ result = self_aligning_function (g_1, g_2); if (result != 42) abort (); - asm ("popl %esi"); + asm (POP); } Index: testsuite/gcc.target/i386/20060512-2.c =================================================================== --- testsuite/gcc.target/i386/20060512-2.c (revision 228530) +++ testsuite/gcc.target/i386/20060512-2.c (working copy) @@ -1,5 +1,4 @@ /* { dg-do compile } */ -/* { dg-require-effective-target ia32 } */ /* { dg-options "-std=gnu99 -mpreferred-stack-boundary=4" } */ int outer_function (int x, int y)