On Tue, May 12, 2020 at 10:07 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Update STV pass to properly count cost of XMM register push.  In 32-bit
> mode, to convert XMM register push in DImode, we do an XMM store in
> DImode, followed by 2 memory pushes in SImode, instead of 2 integer
> register pushes in SImode.  To convert XM register push in SImode, we
> do an XMM register to integer register move in SImode, followed an
> integer register push in SImode, instead of an integer register push in
> SImode.  In 64-bit mode, we do an XMM register to integer register move
> in SImode or DImode, followed an integer register push in SImode or
> DImode, instead of an integer register push SImode or DImode.
>
> Tested on Linux/x86 and Linux/x86-64.

I think it is better to implement XMM register pushes, and split them
after reload to a sequence of:

(set (reg:P SP_REG) (plus:P SP_REG) (const_int -8)))
(set (match_dup 0) (match_dup 1))

This is definitelly better that tripsthrough memory to stack.

There are plenty of examples of fake pushes in i386.md, just grep for
"%%% Kill this when call"

Uros.

> OK for master?
>
> Thanks.
>
> H.J.
> --
> gcc/
>         PR target/95021
>         * config/i386/i386-features.c
>         (general_scalar_chain::general_scalar_chain): Initialize
>         n_sse_push.
>         (general_scalar_chain::mark_dual_mode_def): Add a df_ref
>         argument for reference.  Increment n_sse_push for XMM register
>         push.
>         (timode_scalar_chain::mark_dual_mode_def): Add a dummy df_ref
>         argument.
>         (scalar_chain::analyze_register_chain): Pass chain->ref
>         to mark_dual_mode_def.
>         (general_scalar_chain::compute_convert_gain): Count cost of
>         XMM register push.
>         * config/i386/i386-features.h (scalar_chain::mark_dual_mode_def):
>         Add a df_ref argument.
>         (general_scalar_chain): Add n_sse_push.
>         (general_scalar_chain::mark_dual_mode_def): Add a df_ref
>         argument.
>         (timode_scalar_chain::mark_dual_mode_def): Add a df_ref
>         argument.
>
> gcc/testsuite/
>
>         PR target/95021
>         * gcc.target/i386/pr95021-1.c: New test.
>         * gcc.target/i386/pr95021-2.c: Likewise.
> ---
>  gcc/config/i386/i386-features.c           | 33 ++++++++++++++++++++---
>  gcc/config/i386/i386-features.h           |  7 ++---
>  gcc/testsuite/gcc.target/i386/pr95021-1.c | 25 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr95021-2.c | 25 +++++++++++++++++
>  4 files changed, 83 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c
>
> diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
> index 78fb373db6e..c85ab41350c 100644
> --- a/gcc/config/i386/i386-features.c
> +++ b/gcc/config/i386/i386-features.c
> @@ -326,6 +326,7 @@ general_scalar_chain::general_scalar_chain (enum 
> machine_mode smode_,
>    insns_conv = BITMAP_ALLOC (NULL);
>    n_sse_to_integer = 0;
>    n_integer_to_sse = 0;
> +  n_sse_push = 0;
>  }
>
>  general_scalar_chain::~general_scalar_chain ()
> @@ -337,7 +338,7 @@ general_scalar_chain::~general_scalar_chain ()
>     conversion.  */
>
>  void
> -general_scalar_chain::mark_dual_mode_def (df_ref def)
> +general_scalar_chain::mark_dual_mode_def (df_ref def, df_ref ref)
>  {
>    gcc_assert (DF_REF_REG_DEF_P (def));
>
> @@ -356,6 +357,12 @@ general_scalar_chain::mark_dual_mode_def (df_ref def)
>        if (!reg_new)
>         return;
>        n_sse_to_integer++;
> +      rtx_insn *insn = DF_REF_INSN (ref);
> +      rtx set = single_set (insn);
> +      /* Count XMM register push.  */

Count XMM register pushes.

> +      if (set
> +         && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set))))
> +       n_sse_push++;
>      }
>
>    if (dump_file)
> @@ -367,7 +374,7 @@ general_scalar_chain::mark_dual_mode_def (df_ref def)
>  /* For TImode conversion, it is unused.  */
>
>  void
> -timode_scalar_chain::mark_dual_mode_def (df_ref)
> +timode_scalar_chain::mark_dual_mode_def (df_ref, df_ref)
>  {
>    gcc_unreachable ();
>  }
> @@ -408,14 +415,14 @@ scalar_chain::analyze_register_chain (bitmap 
> candidates, df_ref ref)
>           if (dump_file)
>             fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
>                      DF_REF_REGNO (chain->ref), uid);
> -         mark_dual_mode_def (chain->ref);
> +         mark_dual_mode_def (chain->ref, chain->ref);
>         }
>        else
>         {
>           if (dump_file)
>             fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
>                      DF_REF_REGNO (chain->ref), uid);
> -         mark_dual_mode_def (ref);
> +         mark_dual_mode_def (ref, chain->ref);
>         }
>      }
>  }
> @@ -627,6 +634,24 @@ general_scalar_chain::compute_convert_gain ()
>       are at the moment.  */
>    cost += n_integer_to_sse * ix86_cost->sse_to_integer;
>
> +  /* In 32-bit mode, to convert XMM register push in DImode, we do
> +     an XMM store in DImode, followed by 2 memory pushes in SImode,

In 32-bit mode, DImode XMM register push is converted to a DImode XMM
store, followed by 2 SImode pushes from memory ...

> +     instead of 2 integer register pushes in SImode.  To convert XM
> +     register push in SImode, we do an XMM register to integer register
> +     move in SImode, followed an integer register push in SImode,
> +     instead of an integer register push in SImode.  In 64-bit mode,

... instead of 2 SImode pushes from integer registers.  SImode XMM
register push is converted to a move to an integer register, followed
by a SImode push from an integer register.

> +     we do an XMM register to integer register move in SImode or DImode,
> +     followed an integer register push in SImode or DImode, instead of
> +     an integer register push SImode or DImode.   */

... SImode or DImode XMM register push is converted to a move to an
integer register, followed by a SImode push from memory instead of  a
SImode or DImode integer register push.

> +  if (n_sse_push)
> +    {
> +      if (TARGET_64BIT || m == 1)
> +       cost += n_sse_push * ix86_cost->sse_to_integer;
> +      else
> +       cost += n_sse_push * (ix86_cost->sse_store[sse_cost_idx]
> +                             + m * ix86_cost->int_load[2]);
> +    }
> +
>    if (dump_file)
>      fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
>
> diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
> index ee6b10f12c1..3358015dc89 100644
> --- a/gcc/config/i386/i386-features.h
> +++ b/gcc/config/i386/i386-features.h
> @@ -159,7 +159,7 @@ class scalar_chain
>   private:
>    void add_insn (bitmap candidates, unsigned insn_uid);
>    void analyze_register_chain (bitmap candidates, df_ref ref);
> -  virtual void mark_dual_mode_def (df_ref def) = 0;
> +  virtual void mark_dual_mode_def (df_ref def, df_ref ref) = 0;
>    virtual void convert_insn (rtx_insn *insn) = 0;
>    virtual void convert_registers () = 0;
>  };
> @@ -175,7 +175,8 @@ class general_scalar_chain : public scalar_chain
>    bitmap insns_conv;
>    unsigned n_sse_to_integer;
>    unsigned n_integer_to_sse;
> -  void mark_dual_mode_def (df_ref def);
> +  unsigned n_sse_push;
> +  void mark_dual_mode_def (df_ref def, df_ref ref);
>    void convert_insn (rtx_insn *insn);
>    void convert_op (rtx *op, rtx_insn *insn);
>    void convert_reg (rtx_insn *insn, rtx dst, rtx src);
> @@ -193,7 +194,7 @@ class timode_scalar_chain : public scalar_chain
>    int compute_convert_gain () { return 1; }
>
>   private:
> -  void mark_dual_mode_def (df_ref def);
> +  void mark_dual_mode_def (df_ref def, df_ref ref);
>    void fix_debug_reg_uses (rtx reg);
>    void convert_insn (rtx_insn *insn);
>    /* We don't convert registers to difference size.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c 
> b/gcc/testsuite/gcc.target/i386/pr95021-1.c
> new file mode 100644
> index 00000000000..b47ba332e81
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -msse2 -mstv -mregparm=0 -W" } */
> +/* { dg-final { scan-assembler-not "xmm" } } */
> +
> +typedef long long a;
> +struct __jmp_buf_tag   {
> +};
> +typedef struct __jmp_buf_tag sigjmp_buf[1];
> +sigjmp_buf jmp_buf;
> +int __sigsetjmp (sigjmp_buf, int);
> +typedef struct {
> +  a target;
> +} b;
> +extern a *target_p;
> +b *c;
> +extern void foo (a);
> +void
> +d(void)
> +{
> +  if (__sigsetjmp(jmp_buf, 1)) {
> +    a target = *target_p;
> +    c->target = target;
> +    foo(target);
> +  }
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c 
> b/gcc/testsuite/gcc.target/i386/pr95021-2.c
> new file mode 100644
> index 00000000000..b213b5db072
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */
> +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
> +
> +typedef long long a;
> +struct __jmp_buf_tag   {
> +};
> +typedef struct __jmp_buf_tag sigjmp_buf[1];
> +sigjmp_buf jmp_buf;
> +int __sigsetjmp (sigjmp_buf, int);
> +typedef struct {
> +  a target;
> +} b;
> +extern a *target_p;
> +b *c;
> +extern void foo (a);
> +void
> +d(void)
> +{
> +  if (__sigsetjmp(jmp_buf, 1)) {
> +    a target = *target_p;
> +    c->target = target;
> +    foo(target);
> +  }
> +}
> --
> 2.26.2
>

Reply via email to