On Tue, Jul 2, 2024 at 11:24 AM Hongyu Wang <hongyu.w...@intel.com> wrote:
>
> Hi,
>
> According to APX spec, the pushp/popp pairs should be matched,
> otherwise the PPX hint cannot take effect and cause performance loss.
>
> In the ix86_expand_epilogue, there are several optimizations that may
> cause the epilogue using mov to restore the regs. Check if PPX applied
> and prevent usage of mov/leave in the epilogue.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu.
>
> Ok for trunk?
Ok.
>
> gcc/ChangeLog:
>
>         * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
>         flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
>         (ix86_emit_save_regs): Emit ppx is available only when
>         TARGET_APX_PPX && !crtl->calls_eh_return.
>         (ix86_expand_epilogue): Don't restore reg using mov when
>         apx_ppx_used flag is true.
>         * config/i386/i386.h (struct machine_frame_state):
>         Add apx_ppx_used flag.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ppx-2.c: New test.
>         * gcc.target/i386/apx-ppx-3.c: Likewise.
> ---
>  gcc/config/i386/i386.cc                   | 13 +++++++++----
>  gcc/config/i386/i386.h                    |  4 ++++
>  gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
>  4 files changed, 34 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index bd7411190af..99def8d4a77 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
>  {
>    int regno;
>    rtx_insn *insn;
> +  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
>
>    if (!TARGET_APX_PUSH2POP2
>        || !ix86_can_use_push2pop2 ()
> @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
>         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
>           {
>             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> -                                       TARGET_APX_PPX));
> +                                       use_ppx));
>             RTX_FRAME_RELATED_P (insn) = 1;
>           }
>      }
> @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
>                                                               regno_list[0]),
>                                                  gen_rtx_REG (word_mode,
>                                                               regno_list[1]),
> -                                                TARGET_APX_PPX));
> +                                                use_ppx));
>                     RTX_FRAME_RELATED_P (insn) = 1;
>                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
>
> @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
>             else
>               {
>                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> -                                           TARGET_APX_PPX));
> +                                           use_ppx));
>                 RTX_FRAME_RELATED_P (insn) = 1;
>                 aligned = true;
>               }
> @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
>         {
>           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
>                                                    regno_list[0]),
> -                                     TARGET_APX_PPX));
> +                                     use_ppx));
>           RTX_FRAME_RELATED_P (insn) = 1;
>         }
>      }
> @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
>        if (!frame.save_regs_using_mov)
>         {
>           ix86_emit_save_regs ();
> +         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
>           int_registers_saved = true;
>           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
>         }
> @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
>    /* SEH requires the use of pops to identify the epilogue.  */
>    else if (TARGET_SEH)
>      restore_regs_via_mov = false;
> +  /* If we already save reg with pushp, don't use move at epilogue.  */
> +  else if (m->fs.apx_ppx_used)
> +    restore_regs_via_mov = false;
>    /* If we're only restoring one register and sp cannot be used then
>       using a move instruction to restore the register since it's
>       less work than reloading sp and popping the register.  */
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 147b12cd014..0c5292e1d64 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
>       The flags realigned and sp_realigned are mutually exclusive.  */
>    BOOL_BITFIELD sp_realigned : 1;
>
> +  /* When APX_PPX used in prologue, force epilogue to emit
> +  popp instead of move and leave.  */
> +  BOOL_BITFIELD apx_ppx_used : 1;
> +
>    /* If sp_realigned is set, this is the last valid offset from the CFA
>       that can be used for access with the frame pointer.  */
>    HOST_WIDE_INT sp_realigned_fp_last;
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c 
> b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> new file mode 100644
> index 00000000000..42a95340b55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> +
> +/* { dg-final { scan-assembler "pushp" } } */
> +/* { dg-final { scan-assembler "popp" } } */
> +/* { dg-final { scan-assembler-not "leave" } } */
> +
> +extern int bar (int a);
> +extern int *q;
> +
> +void foo (int *a)
> +{
> +  q[2] = bar (q[1]);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c 
> b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> new file mode 100644
> index 00000000000..76931fbe294
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mapx-features=ppx" } */
> +
> +/* { dg-final { scan-assembler-not "pushp" } } */
> +/* { dg-final { scan-assembler-not "popp" } } */
> +
> +#include "eh_return-2.c"
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to