On Tue, May 12, 2020 at 10:07 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > Update STV pass to properly count cost of XMM register push. In 32-bit > mode, to convert XMM register push in DImode, we do an XMM store in > DImode, followed by 2 memory pushes in SImode, instead of 2 integer > register pushes in SImode. To convert XM register push in SImode, we > do an XMM register to integer register move in SImode, followed an > integer register push in SImode, instead of an integer register push in > SImode. In 64-bit mode, we do an XMM register to integer register move > in SImode or DImode, followed an integer register push in SImode or > DImode, instead of an integer register push SImode or DImode. > > Tested on Linux/x86 and Linux/x86-64.
I think it is better to implement XMM register pushes, and split them after reload to a sequence of: (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) (set (match_dup 0) (match_dup 1)) This is definitelly better that tripsthrough memory to stack. There are plenty of examples of fake pushes in i386.md, just grep for "%%% Kill this when call" Uros. > OK for master? > > Thanks. > > H.J. > -- > gcc/ > PR target/95021 > * config/i386/i386-features.c > (general_scalar_chain::general_scalar_chain): Initialize > n_sse_push. > (general_scalar_chain::mark_dual_mode_def): Add a df_ref > argument for reference. Increment n_sse_push for XMM register > push. > (timode_scalar_chain::mark_dual_mode_def): Add a dummy df_ref > argument. > (scalar_chain::analyze_register_chain): Pass chain->ref > to mark_dual_mode_def. > (general_scalar_chain::compute_convert_gain): Count cost of > XMM register push. > * config/i386/i386-features.h (scalar_chain::mark_dual_mode_def): > Add a df_ref argument. > (general_scalar_chain): Add n_sse_push. > (general_scalar_chain::mark_dual_mode_def): Add a df_ref > argument. > (timode_scalar_chain::mark_dual_mode_def): Add a df_ref > argument. > > gcc/testsuite/ > > PR target/95021 > * gcc.target/i386/pr95021-1.c: New test. > * gcc.target/i386/pr95021-2.c: Likewise. > --- > gcc/config/i386/i386-features.c | 33 ++++++++++++++++++++--- > gcc/config/i386/i386-features.h | 7 ++--- > gcc/testsuite/gcc.target/i386/pr95021-1.c | 25 +++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr95021-2.c | 25 +++++++++++++++++ > 4 files changed, 83 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c > > diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c > index 78fb373db6e..c85ab41350c 100644 > --- a/gcc/config/i386/i386-features.c > +++ b/gcc/config/i386/i386-features.c > @@ -326,6 +326,7 @@ general_scalar_chain::general_scalar_chain (enum > machine_mode smode_, > insns_conv = BITMAP_ALLOC (NULL); > n_sse_to_integer = 0; > n_integer_to_sse = 0; > + n_sse_push = 0; > } > > general_scalar_chain::~general_scalar_chain () > @@ -337,7 +338,7 @@ general_scalar_chain::~general_scalar_chain () > conversion. */ > > void > -general_scalar_chain::mark_dual_mode_def (df_ref def) > +general_scalar_chain::mark_dual_mode_def (df_ref def, df_ref ref) > { > gcc_assert (DF_REF_REG_DEF_P (def)); > > @@ -356,6 +357,12 @@ general_scalar_chain::mark_dual_mode_def (df_ref def) > if (!reg_new) > return; > n_sse_to_integer++; > + rtx_insn *insn = DF_REF_INSN (ref); > + rtx set = single_set (insn); > + /* Count XMM register push. */ Count XMM register pushes. > + if (set > + && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))) > + n_sse_push++; > } > > if (dump_file) > @@ -367,7 +374,7 @@ general_scalar_chain::mark_dual_mode_def (df_ref def) > /* For TImode conversion, it is unused. */ > > void > -timode_scalar_chain::mark_dual_mode_def (df_ref) > +timode_scalar_chain::mark_dual_mode_def (df_ref, df_ref) > { > gcc_unreachable (); > } > @@ -408,14 +415,14 @@ scalar_chain::analyze_register_chain (bitmap > candidates, df_ref ref) > if (dump_file) > fprintf (dump_file, " r%d def in insn %d isn't convertible\n", > DF_REF_REGNO (chain->ref), uid); > - mark_dual_mode_def (chain->ref); > + mark_dual_mode_def (chain->ref, chain->ref); > } > else > { > if (dump_file) > fprintf (dump_file, " r%d use in insn %d isn't convertible\n", > DF_REF_REGNO (chain->ref), uid); > - mark_dual_mode_def (ref); > + mark_dual_mode_def (ref, chain->ref); > } > } > } > @@ -627,6 +634,24 @@ general_scalar_chain::compute_convert_gain () > are at the moment. */ > cost += n_integer_to_sse * ix86_cost->sse_to_integer; > > + /* In 32-bit mode, to convert XMM register push in DImode, we do > + an XMM store in DImode, followed by 2 memory pushes in SImode, In 32-bit mode, DImode XMM register push is converted to a DImode XMM store, followed by 2 SImode pushes from memory ... > + instead of 2 integer register pushes in SImode. To convert XM > + register push in SImode, we do an XMM register to integer register > + move in SImode, followed an integer register push in SImode, > + instead of an integer register push in SImode. In 64-bit mode, ... instead of 2 SImode pushes from integer registers. SImode XMM register push is converted to a move to an integer register, followed by a SImode push from an integer register. > + we do an XMM register to integer register move in SImode or DImode, > + followed an integer register push in SImode or DImode, instead of > + an integer register push SImode or DImode. */ ... SImode or DImode XMM register push is converted to a move to an integer register, followed by a SImode push from memory instead of a SImode or DImode integer register push. > + if (n_sse_push) > + { > + if (TARGET_64BIT || m == 1) > + cost += n_sse_push * ix86_cost->sse_to_integer; > + else > + cost += n_sse_push * (ix86_cost->sse_store[sse_cost_idx] > + + m * ix86_cost->int_load[2]); > + } > + > if (dump_file) > fprintf (dump_file, " Registers conversion cost: %d\n", cost); > > diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h > index ee6b10f12c1..3358015dc89 100644 > --- a/gcc/config/i386/i386-features.h > +++ b/gcc/config/i386/i386-features.h > @@ -159,7 +159,7 @@ class scalar_chain > private: > void add_insn (bitmap candidates, unsigned insn_uid); > void analyze_register_chain (bitmap candidates, df_ref ref); > - virtual void mark_dual_mode_def (df_ref def) = 0; > + virtual void mark_dual_mode_def (df_ref def, df_ref ref) = 0; > virtual void convert_insn (rtx_insn *insn) = 0; > virtual void convert_registers () = 0; > }; > @@ -175,7 +175,8 @@ class general_scalar_chain : public scalar_chain > bitmap insns_conv; > unsigned n_sse_to_integer; > unsigned n_integer_to_sse; > - void mark_dual_mode_def (df_ref def); > + unsigned n_sse_push; > + void mark_dual_mode_def (df_ref def, df_ref ref); > void convert_insn (rtx_insn *insn); > void convert_op (rtx *op, rtx_insn *insn); > void convert_reg (rtx_insn *insn, rtx dst, rtx src); > @@ -193,7 +194,7 @@ class timode_scalar_chain : public scalar_chain > int compute_convert_gain () { return 1; } > > private: > - void mark_dual_mode_def (df_ref def); > + void mark_dual_mode_def (df_ref def, df_ref ref); > void fix_debug_reg_uses (rtx reg); > void convert_insn (rtx_insn *insn); > /* We don't convert registers to difference size. */ > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c > b/gcc/testsuite/gcc.target/i386/pr95021-1.c > new file mode 100644 > index 00000000000..b47ba332e81 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c > @@ -0,0 +1,25 @@ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-options "-O2 -msse2 -mstv -mregparm=0 -W" } */ > +/* { dg-final { scan-assembler-not "xmm" } } */ > + > +typedef long long a; > +struct __jmp_buf_tag { > +}; > +typedef struct __jmp_buf_tag sigjmp_buf[1]; > +sigjmp_buf jmp_buf; > +int __sigsetjmp (sigjmp_buf, int); > +typedef struct { > + a target; > +} b; > +extern a *target_p; > +b *c; > +extern void foo (a); > +void > +d(void) > +{ > + if (__sigsetjmp(jmp_buf, 1)) { > + a target = *target_p; > + c->target = target; > + foo(target); > + } > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c > b/gcc/testsuite/gcc.target/i386/pr95021-2.c > new file mode 100644 > index 00000000000..b213b5db072 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c > @@ -0,0 +1,25 @@ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */ > +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ > + > +typedef long long a; > +struct __jmp_buf_tag { > +}; > +typedef struct __jmp_buf_tag sigjmp_buf[1]; > +sigjmp_buf jmp_buf; > +int __sigsetjmp (sigjmp_buf, int); > +typedef struct { > + a target; > +} b; > +extern a *target_p; > +b *c; > +extern void foo (a); > +void > +d(void) > +{ > + if (__sigsetjmp(jmp_buf, 1)) { > + a target = *target_p; > + c->target = target; > + foo(target); > + } > +} > -- > 2.26.2 >