On Sat, Jun 8, 2019 at 12:14 AM Richard Sandiford <richard.sandif...@arm.com> wrote: > > "H.J. Lu" <hjl.to...@gmail.com> writes: > > On Fri, Jun 7, 2019 at 1:22 AM Richard Biener <rguent...@suse.de> wrote: > >> > >> On Fri, 7 Jun 2019, Richard Sandiford wrote: > >> > >> > "H.J. Lu" <hjl.to...@gmail.com> writes: > >> > > locate_and_pad_parm is called when expanding function call from > >> > > initialize_argument_information and when generating function body > >> > > from assign_parm_find_entry_rtl: > >> > > > >> > > /* Remember if the outgoing parameter requires extra alignment on the > >> > > calling function side. */ > >> > > if (crtl->stack_alignment_needed < boundary) > >> > > crtl->stack_alignment_needed = boundary; > >> > > if (crtl->preferred_stack_boundary < boundary) > >> > > crtl->preferred_stack_boundary = boundary; > >> > > > >> > > stack_alignment_needed and preferred_stack_boundary should be updated > >> > > only when expanding function call, not when generating function body. > >> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for > >> > > expanding function call. Update stack_alignment_needed and > >> > > preferred_stack_boundary if the parameter is passed on stack and only > >> > > when expanding function call. > >> > > > >> > > Tested on Linux/x86-64. > >> > > > >> > > OK for trunk? > >> > > > >> > > Thanks. > >> > > > >> > > -- > >> > > H.J. > >> > > > >> > > From e91e20ad8e10373db2c6d8f99a3da0bbf46c5c34 Mon Sep 17 00:00:00 2001 > >> > > From: "H.J. Lu" <hjl.to...@gmail.com> > >> > > Date: Wed, 5 Jun 2019 12:55:19 -0700 > >> > > Subject: [PATCH] Update preferred_stack_boundary only when expanding > >> > > function > >> > > call > >> > > > >> > > locate_and_pad_parm is called when expanding function call from > >> > > initialize_argument_information and when generating function body > >> > > from assign_parm_find_entry_rtl: > >> > > > >> > > /* Remember if the outgoing parameter requires extra alignment on the > >> > > calling function side. */ > >> > > if (crtl->stack_alignment_needed < boundary) > >> > > crtl->stack_alignment_needed = boundary; > >> > > if (crtl->preferred_stack_boundary < boundary) > >> > > crtl->preferred_stack_boundary = boundary; > >> > > > >> > > stack_alignment_needed and preferred_stack_boundary should be updated > >> > > only when expanding function call, not when generating function body. > >> > > Add an argument, outgoing_p, to locate_and_pad_parm to indicate for > >> > > expanding function call. Update stack_alignment_needed and > >> > > preferred_stack_boundary if the parameter is passed on stack and only > >> > > when expanding function call. > >> > > > >> > > Tested on Linux/x86-64. > >> > > > >> > > gcc/ > >> > > > >> > > PR rtl-optimization/90765 > >> > > * function.c (assign_parm_find_entry_rtl): Pass false to > >> > > locate_and_pad_parm. > >> > > (locate_and_pad_parm): Add an argument, outgoing_p, to indicate > >> > > for expanding function call. Update stack_alignment_needed and > >> > > preferred_stack_boundary only if outgoing_p is true and the > >> > > the parameter is passed on stack. > >> > > * function.h (locate_and_pad_parm): Add an argument, outgoing_p, > >> > > defaulting to true. > >> > > > >> > > gcc/testsuite/ > >> > > > >> > > PR rtl-optimization/90765 > >> > > * gcc.target/i386/pr90765-1.c: New test. > >> > > * gcc.target/i386/pr90765-2.c: Likewise. > >> > > --- > >> > > gcc/function.c | 21 +++++++++++++-------- > >> > > gcc/function.h | 3 ++- > >> > > gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 +++++++++++ > >> > > gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++++ > >> > > 4 files changed, 44 insertions(+), 9 deletions(-) > >> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c > >> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c > >> > > > >> > > diff --git a/gcc/function.c b/gcc/function.c > >> > > index e30ee259bec..9b6673f6f0d 100644 > >> > > --- a/gcc/function.c > >> > > +++ b/gcc/function.c > >> > > @@ -2601,7 +2601,7 @@ assign_parm_find_entry_rtl (struct > >> > > assign_parm_data_all *all, > >> > > locate_and_pad_parm (data->promoted_mode, data->passed_type, > >> > > in_regs, > >> > > all->reg_parm_stack_space, > >> > > entry_parm ? data->partial : 0, > >> > > current_function_decl, > >> > > - &all->stack_args_size, &data->locate); > >> > > + &all->stack_args_size, &data->locate, false); > >> > > > >> > > /* Update parm_stack_boundary if this parameter is passed in the > >> > > stack. */ > >> > > @@ -3954,7 +3954,8 @@ locate_and_pad_parm (machine_mode passed_mode, > >> > > tree type, int in_regs, > >> > > int reg_parm_stack_space, int partial, > >> > > tree fndecl ATTRIBUTE_UNUSED, > >> > > struct args_size *initial_offset_ptr, > >> > > - struct locate_and_pad_arg_data *locate) > >> > > + struct locate_and_pad_arg_data *locate, > >> > > + bool outgoing_p) > >> > > { > >> > > tree sizetree; > >> > > pad_direction where_pad; > >> > > @@ -4021,12 +4022,16 @@ locate_and_pad_parm (machine_mode passed_mode, > >> > > tree type, int in_regs, > >> > > } > >> > > } > >> > > > >> > > - /* Remember if the outgoing parameter requires extra alignment on > >> > > the > >> > > - calling function side. */ > >> > > - if (crtl->stack_alignment_needed < boundary) > >> > > - crtl->stack_alignment_needed = boundary; > >> > > - if (crtl->preferred_stack_boundary < boundary) > >> > > - crtl->preferred_stack_boundary = boundary; > >> > > + if (outgoing_p && !in_regs) > >> > > + { > >> > > + /* Remember if the outgoing parameter requires extra alignment > >> > > on > >> > > + the calling function side if this parameter is passed in the > >> > > + stack. */ > >> > > + if (crtl->stack_alignment_needed < boundary) > >> > > + crtl->stack_alignment_needed = boundary; > >> > > + if (crtl->preferred_stack_boundary < boundary) > >> > > + crtl->preferred_stack_boundary = boundary; > >> > > + } > >> > > >> > Just my opinion, but I think this shows that the code isn't really > >> > in the right place. IMO locate_and_pad_parm should just describe > >> > the ABI and is too low-level to be modifying global state like this. > >> > > >> > I think we could instead do this by walking over the argument vectors > >> > in a subroutine of expand_call and emit_library_call_value_1. > >> > expand_call is also where we do: > >> > > >> > /* Ensure current function's preferred stack boundary is at least > >> > what we need. Stack alignment may also increase preferred stack > >> > boundary. */ > >> > if (crtl->preferred_stack_boundary < preferred_stack_boundary) > >> > crtl->preferred_stack_boundary = preferred_stack_boundary; > >> > else > >> > preferred_stack_boundary = crtl->preferred_stack_boundary; > >> > >> Agreed - that would be much cleaner. > >> > > > > Here is the updated patch to add update_stack_alignment_for_call. > > > > Tested on Linux/x86-64. OK for trunk? > > > > Thanks. > > > > -- > > H.J. > > > > From 00d81edc1e62c43c2084483df055ea68f4047679 Mon Sep 17 00:00:00 2001 > > From: "H.J. Lu" <hjl.to...@gmail.com> > > Date: Wed, 5 Jun 2019 12:55:19 -0700 > > Subject: [PATCH] Update preferred_stack_boundary only when expanding > > function > > call > > > > locate_and_pad_parm is called when expanding function call from > > initialize_argument_information and when generating function body > > from assign_parm_find_entry_rtl: > > > > /* Remember if the outgoing parameter requires extra alignment on the > > calling function side. */ > > if (crtl->stack_alignment_needed < boundary) > > crtl->stack_alignment_needed = boundary; > > if (crtl->preferred_stack_boundary < boundary) > > crtl->preferred_stack_boundary = boundary; > > > > stack_alignment_needed and preferred_stack_boundary should be updated > > only when expanding function call, not when generating function body. > > > > Add update_stack_alignment_for_call to update stack alignment when > > outgoing parameter is passed in the stack. > > > > gcc/ > > > > PR rtl-optimization/90765 > > * calls.c (update_stack_alignment_for_call): New function. > > (initialize_argument_information): Call > > update_stack_alignment_for_call when outgoing parameter is > > passed in the stack. > > (emit_library_call_value_1): Likewise. > > * function.c (locate_and_pad_parm): Don't update > > stack_alignment_needed and preferred_stack_boundary. > > > > gcc/testsuite/ > > > > PR rtl-optimization/90765 > > * gcc.target/i386/pr90765-1.c: New test. > > * gcc.target/i386/pr90765-2.c: Likewise. > > --- > > gcc/calls.c | 33 ++++++++++++++++++----- > > gcc/function.c | 7 ----- > > gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++ > > gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 +++++++++++++ > > 4 files changed, 55 insertions(+), 14 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c > > > > diff --git a/gcc/calls.c b/gcc/calls.c > > index c8a42680041..8ba82fbb6c0 100644 > > --- a/gcc/calls.c > > +++ b/gcc/calls.c > > @@ -1841,6 +1841,19 @@ maybe_complain_about_tail_call (tree call_expr, > > const char *reason) > > error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason); > > } > > > > +/* Update stack alignment when the parameter is passed in the stack > > + since the outgoing parameter requires extra alignment on the calling > > + function side. */ > > + > > +static void > > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) > > +{ > > + if (crtl->stack_alignment_needed < locate->boundary) > > + crtl->stack_alignment_needed = locate->boundary; > > + if (crtl->preferred_stack_boundary < locate->boundary) > > + crtl->preferred_stack_boundary = locate->boundary; > > +} > > + > > /* Fill in ARGS_SIZE and ARGS array based on the parameters found in > > CALL_EXPR EXP. > > > > @@ -2162,15 +2175,18 @@ initialize_argument_information (int num_actuals > > ATTRIBUTE_UNUSED, > > if (args[i].reg == 0 || args[i].partial != 0 > > || reg_parm_stack_space > 0 > > || args[i].pass_on_stack) > > - locate_and_pad_parm (mode, type, > > + { > > + locate_and_pad_parm (mode, type, > > #ifdef STACK_PARMS_IN_REG_PARM_AREA > > - 1, > > + 1, > > #else > > - args[i].reg != 0, > > + args[i].reg != 0, > > #endif > > - reg_parm_stack_space, > > - args[i].pass_on_stack ? 0 : args[i].partial, > > - fndecl, args_size, &args[i].locate); > > + reg_parm_stack_space, > > + args[i].pass_on_stack ? 0 : args[i].partial, > > + fndecl, args_size, &args[i].locate); > > + update_stack_alignment_for_call (&args[i].locate); > > + } > > #ifdef BLOCK_REG_PADDING > > else > > /* The argument is passed entirely in registers. See at which > > @@ -4861,7 +4877,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, > > rtx value, > > > > if (argvec[count].reg == 0 || argvec[count].partial != 0 > > || reg_parm_stack_space > 0) > > - args_size.constant += argvec[count].locate.size.constant; > > + { > > + args_size.constant += argvec[count].locate.size.constant; > > + update_stack_alignment_for_call (&argvec[count].locate); > > + } > > > > targetm.calls.function_arg_advance (args_so_far, Pmode, (tree) 0, > > true); > > There are two calls to locate_and_pad_parm in emit_library_call_value_1, > but I think this only handles the second. > > IMO it'd be clearer/safer to iterate over the argument vector separately > rather than do it on the fly. In the case of expand_call it would make > sense to do it near the crtl->preferred_stack_boundary stuff quoted above, > so that this is all in one place. >
Fixed. There is the updated patch. Tested on Linux/x86-64. OK for trunk? Thanks. -- H.J.
From 053632d7724ad3299008fc83c64c5d401c1b2ebe Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Wed, 5 Jun 2019 12:55:19 -0700 Subject: [PATCH] Update preferred_stack_boundary only when expanding function call locate_and_pad_parm is called when expanding function call from initialize_argument_information and when generating function body from assign_parm_find_entry_rtl: /* Remember if the outgoing parameter requires extra alignment on the calling function side. */ if (crtl->stack_alignment_needed < boundary) crtl->stack_alignment_needed = boundary; if (crtl->preferred_stack_boundary < boundary) crtl->preferred_stack_boundary = boundary; stack_alignment_needed and preferred_stack_boundary should be updated only when expanding function call, not when generating function body. Add update_stack_alignment_for_call to update stack alignment when outgoing parameter is passed in the stack. gcc/ PR rtl-optimization/90765 * calls.c (update_stack_alignment_for_call): New function. (expand_call): Call update_stack_alignment_for_call when outgoing parameter is passed in the stack. (emit_library_call_value_1): Likewise. * function.c (locate_and_pad_parm): Don't update stack_alignment_needed and preferred_stack_boundary. gcc/testsuite/ PR rtl-optimization/90765 * gcc.target/i386/pr90765-1.c: New test. * gcc.target/i386/pr90765-2.c: Likewise. --- gcc/calls.c | 25 +++++++++++++++++++++++ gcc/function.c | 7 ------- gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++++ gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++ 4 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c diff --git a/gcc/calls.c b/gcc/calls.c index c8a42680041..6ab138e7bb0 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp, return true; } +/* Update stack alignment when the parameter is passed in the stack + since the outgoing parameter requires extra alignment on the calling + function side. */ + +static void +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) +{ + if (crtl->stack_alignment_needed < locate->boundary) + crtl->stack_alignment_needed = locate->boundary; + if (crtl->preferred_stack_boundary < locate->boundary) + crtl->preferred_stack_boundary = locate->boundary; +} + /* Generate all the code for a CALL_EXPR exp and return an rtx for its value. Store the value in TARGET (specified as an rtx) if convenient. @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore) /* Ensure current function's preferred stack boundary is at least what we need. Stack alignment may also increase preferred stack boundary. */ + for (i = 0; i < num_actuals; i++) + if (reg_parm_stack_space > 0 + || args[i].reg == 0 + || args[i].partial != 0 + || args[i].pass_on_stack) + update_stack_alignment_for_call (&args[i].locate); if (crtl->preferred_stack_boundary < preferred_stack_boundary) crtl->preferred_stack_boundary = preferred_stack_boundary; else @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); } + for (int i = 0; i < nargs; i++) + if (reg_parm_stack_space > 0 + || argvec[i].reg == 0 + || argvec[i].partial != 0) + update_stack_alignment_for_call (&argvec[i].locate); + /* If this machine requires an external definition for library functions, write one out. */ assemble_external_libcall (fun); diff --git a/gcc/function.c b/gcc/function.c index e30ee259bec..45b65dc0fd2 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -4021,13 +4021,6 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs, } } - /* Remember if the outgoing parameter requires extra alignment on the - calling function side. */ - if (crtl->stack_alignment_needed < boundary) - crtl->stack_alignment_needed = boundary; - if (crtl->preferred_stack_boundary < boundary) - crtl->preferred_stack_boundary = boundary; - if (ARGS_GROW_DOWNWARD) { locate->slot_offset.constant = -initial_offset_ptr->constant; diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c b/gcc/testsuite/gcc.target/i386/pr90765-1.c new file mode 100644 index 00000000000..178c3ff8054 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */ + +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +void +foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p) +{ + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c b/gcc/testsuite/gcc.target/i386/pr90765-2.c new file mode 100644 index 00000000000..45cf1f03747 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */ +/* { dg-skip-if "" { x86_64-*-mingw* } } */ + +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si, + __v16si, __v16si, __v16si, int, int, int, int, int, + int, __v16si *); + +extern __v16si x, y; + +void +bar (void) +{ + foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y); +} -- 2.20.1