On 12/15/18, H.J. Lu <hjl.to...@gmail.com> wrote: > On Fri, Dec 14, 2018 at 04:04:02PM -0800, H.J. Lu wrote: >> On Fri, Dec 14, 2018 at 3:24 PM Jeff Law <l...@redhat.com> wrote: >> > >> > On 12/14/18 4:01 PM, H.J. Lu wrote: >> > > On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubiz...@gmail.com> >> > > wrote: >> > >> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu...@intel.com> >> > >> wrote: >> > >>> get_frame_size () returns used stack slots during compilation, >> > >>> which >> > >>> may be optimized out later. Since >> > >>> ix86_find_max_used_stack_alignment >> > >>> is called by ix86_finalize_stack_frame_flags to check if stack >> > >>> frame >> > >>> is required, there is no need to call get_frame_size () which may >> > >>> give >> > >>> inaccurate final stack frame size. >> > >>> >> > >>> Tested on AVX512 machine configured with >> > >>> >> > >>> --with-arch=native --with-cpu=native >> > >>> >> > >>> OK for trunk? >> > >>> >> > >>> >> > >>> H.J. >> > >>> --- >> > >>> gcc/ >> > >>> >> > >>> PR target/88483 >> > >>> * config/i386/i386.c (ix86_finalize_stack_frame_flags): >> > >>> Don't >> > >>> use get_frame_size (). >> > >>> >> > >>> gcc/testsuite/ >> > >>> >> > >>> PR target/88483 >> > >>> * gcc.target/i386/stackalign/pr88483.c: New test. >> > >> LGTM, but you know this part of the compiler better than I. >> > >> >> > >> Thanks, >> > >> Uros. >> > >> >> > >>> --- >> > >>> gcc/config/i386/i386.c | 1 - >> > >>> .../gcc.target/i386/stackalign/pr88483.c | 17 >> > >>> +++++++++++++++++ >> > >>> 2 files changed, 17 insertions(+), 1 deletion(-) >> > >>> create mode 100644 >> > >>> gcc/testsuite/gcc.target/i386/stackalign/pr88483.c >> > >>> >> > >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> > >>> index caa701fe242..edc8f4f092e 100644 >> > >>> --- a/gcc/config/i386/i386.c >> > >>> +++ b/gcc/config/i386/i386.c >> > >>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void) >> > >>> && flag_exceptions >> > >>> && cfun->can_throw_non_call_exceptions) >> > >>> && !ix86_frame_pointer_required () >> > >>> - && get_frame_size () == 0 >> > >>> && ix86_nsaved_sseregs () == 0 >> > >>> && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) >> > >>> { >> > >>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c >> > >>> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c >> > >>> new file mode 100644 >> > >>> index 00000000000..5aec8fd4cf6 >> > >>> --- /dev/null >> > >>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c >> > >>> @@ -0,0 +1,17 @@ >> > >>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ >> > >>> +/* { dg-options "-O2 -mavx2" } */ >> > >>> + >> > >>> +struct B >> > >>> +{ >> > >>> + char a[12]; >> > >>> + int b; >> > >>> +}; >> > >>> + >> > >>> +struct B >> > >>> +f2 (void) >> > >>> +{ >> > >>> + struct B x = {}; >> > >>> + return x; >> > >>> +} >> > >>> + >> > >>> +/* { dg-final { scan-assembler-not >> > >>> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */ >> > >>> -- >> > >>> 2.19.2 >> > >>> >> > > My fix triggered a latent bug in ix86_find_max_used_stack_alignment. >> > > Here is the fix. OK for trunk? >> > > >> > > Thanks. >> > > >> > > -- H.J. >> > > >> > > >> > > 0001-x86-Properly-check-stack-reference.patch >> > > >> > > From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00 >> > > 2001 >> > > From: "H.J. Lu" <hjl.to...@gmail.com> >> > > Date: Fri, 14 Dec 2018 12:21:02 -0800 >> > > Subject: [PATCH] x86: Properly check stack reference >> > > >> > > A latent bug in ix86_find_max_used_stack_alignment was uncovered by >> > > the >> > > fix for PR target/88483, which caused: >> > > >> > > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t >> > > ]*\\$-16,[\\t ]*%esp >> > > >> > > on i386. ix86_find_max_used_stack_alignment failed to notice stack >> > > reference via non-stack/frame registers and missed stack alignment >> > > requirement. We should track all registers which may reference stack >> > > by checking registers set from stack referencing registers. >> > > >> > > Tested on i686 and x86-64 with >> > > >> > > --with-arch=native --with-cpu=native >> > > >> > > on AVX512 machine. Tested on i686 and x86-64 without >> > > >> > > --with-arch=native --with-cpu=native >> > > >> > > on x86-64 machine. >> > > >> > > PR target/88483 >> > > * config/i386/i386.c (ix86_stack_referenced_p): New function. >> > > (ix86_find_max_used_stack_alignment): Call >> > > ix86_stack_referenced_p >> > > to check if stack is referenced. >> > > --- >> > > gcc/config/i386/i386.c | 43 >> > > ++++++++++++++++++++++++++++++++++++++---- >> > > 1 file changed, 39 insertions(+), 4 deletions(-) >> > > >> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> > > index 4599ca2a7d5..bf93ec3722f 100644 >> > > --- a/gcc/config/i386/i386.c >> > > +++ b/gcc/config/i386/i386.c >> > > @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end) >> > > return ""; >> > > } >> > > >> > > +/* Return true if OP references stack frame though one of registers >> > > + in STACK_REF_REGS. */ >> > > + >> > > +static bool >> > > +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs) >> > > +{ >> > > + for (int i = 0; i < LAST_REX_INT_REG; i++) >> > > + if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i], >> > > op)) >> > > + return true; >> > > + return false; >> > > +} >> > > + >> > > /* Return true if stack frame is required. Update STACK_ALIGNMENT >> > > to the largest alignment, in bits, of stack slot used if stack >> > > frame is required and CHECK_STACK_SLOT is true. */ >> > > @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned >> > > int &stack_alignment, >> > > >> > > bool require_stack_frame = false; >> > > >> > > + /* Array of hard registers which reference stack frame. */ >> > > + rtx stack_ref_regs[LAST_REX_INT_REG]; >> > > + memset (stack_ref_regs, 0, sizeof (stack_ref_regs)); >> > > + stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx; >> > > + stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx; >> > > + >> > > FOR_EACH_BB_FN (bb, cfun) >> > > { >> > > rtx_insn *insn; >> > > @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned >> > > int &stack_alignment, >> > > { >> > > require_stack_frame = true; >> > > >> > > + rtx body = PATTERN (insn); >> > > + if (GET_CODE (body) == SET) >> > > + { >> > > + /* If a register is set from a stack referencing >> > > + register, it is a stack referencing register. */ >> > > + rtx dest = SET_DEST (body); >> > > + if (REG_P (dest) && !MEM_P (SET_SRC (body))) >> > > + { >> > > + int regno = REGNO (dest); >> > > + gcc_assert (regno < FIRST_PSEUDO_REGISTER); >> > > + if (GENERAL_REGNO_P (regno)) >> > > + { >> > > + add_to_hard_reg_set (&set_up_by_prologue, >> > > Pmode, >> > > + regno); >> > > + stack_ref_regs[regno] = dest; >> > > + } >> > > + } >> > > + } >> > > + >> > > if (check_stack_slot) >> > > { >> > > /* Find the maximum stack alignment. */ >> > > subrtx_iterator::array_type array; >> > > FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) >> > > if (MEM_P (*iter) >> > > - && (reg_mentioned_p (stack_pointer_rtx, >> > > - *iter) >> > > - || reg_mentioned_p (frame_pointer_rtx, >> > > - *iter))) >> > > + && ix86_stack_referenced_p (*iter, >> > > + stack_ref_regs)) >> > > { >> > > unsigned int alignment = MEM_ALIGN (*iter); >> > > if (alignment > stack_alignment) >> > This just does a linear scan of the blocks and insns within the block >> > building up STACK_REF_REGS as we go. >> > >> > The problem is there's no guarantee that we're visiting the blocks in >> > execution order. So we might see an insn that indirectly references >> > the >> > stack reg *before* we see the insn which had the copy from the stack >> > pointer. >> > >> > Or am I missing something? >> > >> >> You are right. We must be conservative in this case Here is the >> updated patch. If there may be indirect stack references, stop and >> restore the original stack alignment. >> >> I am testing it now. >> > > This is updated patch. Tested on i686 and x86-64 with > > --with-arch=native --with-cpu=native > > on AVX512 machine. Tested on i686 and x86-64 without > > --with-arch=native --with-cpu=native > > on x86-64 machine. > > OK for trunk?
No. Please revert the original patch (it was not a regression), and let's revisit this for gcc-10. Uros. > Thanks. > > > H.J. > ---- > A latent bug in ix86_find_max_used_stack_alignment was uncovered by the > fix for PR target/88483, which caused: > > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t > ]*%esp > > on i386. ix86_find_max_used_stack_alignment failed to notice stack > reference via non-stack/frame registers and missed stack alignment > requirement. There's no guarantee that we're visiting the blocks in > execution order. If there may be indirect stack references, stop and > restore the original stack alignment. Update stack alignment only if > we check stack alignment. > > PR target/88483 > * config/i386/i386.c (ix86_stack_referenced_p): New function. > (ix86_find_max_used_stack_alignment): If there may be indirect > stack references, stop and restore the original stack alignment. > --- > gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 74 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index b6dea0c061d..e28a8d50069 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -12777,6 +12777,57 @@ output_probe_stack_range (rtx reg, rtx end) > return ""; > } > > +/* Return true if destination register of SET_BODY may reference stack. > */ > + > +static bool > +ix86_stack_referenced_p (const_rtx set_body) > +{ > + rtx set_dest, set_src; > + int i; > + > + switch (GET_CODE (set_body)) > + { > + case SET: > + /* If a register is set from a stack stack address, it may be > + used to reference stack indirectly. */ > + set_dest = SET_DEST (set_body); > + if (SUBREG_P (set_dest)) > + set_dest = SUBREG_REG (set_dest); > + if (!REG_P (set_dest) || !GENERAL_REG_P (set_dest)) > + return false; > + set_src = SET_SRC (set_body); > + if (address_operand (set_src, VOIDmode)) > + { > + struct ix86_address parts; > + if (!ix86_decompose_address (set_src, &parts)) > + return false; > + if (parts.base) > + { > + if (SUBREG_P (parts.base)) > + parts.base = SUBREG_REG (parts.base); > + return (parts.base == stack_pointer_rtx > + || parts.base == frame_pointer_rtx); > + } > + if (parts.index) > + { > + if (SUBREG_P (parts.index)) > + parts.index = SUBREG_REG (parts.index); > + return (parts.index == stack_pointer_rtx > + || parts.index == frame_pointer_rtx); > + } > + } > + break; > + case PARALLEL: > + for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) > + if (ix86_stack_referenced_p (XVECEXP (set_body, 0, i))) > + return true; > + /* FALLTHROUGH */ > + default: > + break; > + } > + return false; > +} > + > /* Return true if stack frame is required. Update STACK_ALIGNMENT > to the largest alignment, in bits, of stack slot used if stack > frame is required and CHECK_STACK_SLOT is true. */ > @@ -12795,8 +12846,13 @@ ix86_find_max_used_stack_alignment (unsigned int > &stack_alignment, > add_to_hard_reg_set (&set_up_by_prologue, Pmode, > HARD_FRAME_POINTER_REGNUM); > > - /* The preferred stack alignment is the minimum stack alignment. */ > - if (stack_alignment > crtl->preferred_stack_boundary) > + /* Save the original stack alignment. */ > + unsigned int original_stack_alignment = stack_alignment; > + > + /* The preferred stack alignment is the minimum stack alignment. > + Update stack alignment only if we check stack alignment. */ > + if (check_stack_slot > + && stack_alignment > crtl->preferred_stack_boundary) > stack_alignment = crtl->preferred_stack_boundary; > > bool require_stack_frame = false; > @@ -12811,6 +12867,16 @@ ix86_find_max_used_stack_alignment (unsigned int > &stack_alignment, > { > require_stack_frame = true; > > + if (ix86_stack_referenced_p (PATTERN (insn))) > + { > + /* There's no guarantee that we're visiting the > + blocks in execution order. If there may be > + indirect stack references, stop and restore > + the original stack alignment. */ > + stack_alignment = original_stack_alignment; > + goto done; > + } > + > if (check_stack_slot) > { > /* Find the maximum stack alignment. */ > @@ -12827,9 +12893,15 @@ ix86_find_max_used_stack_alignment (unsigned int > &stack_alignment, > stack_alignment = alignment; > } > } > + else > + { > + /* We are done. */ > + goto done; > + } > } > } > > +done: > return require_stack_frame; > } > > -- > 2.19.2 > >