On Wed, Sep 15, 2021 at 10:10 AM <lili....@intel.com> wrote:
>
> From: "H.J. Lu" <hjl.to...@gmail.com>
>
> Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when
> handling avx_partial_xmm_update attribute.  Don't convert AVX partial
> XMM register update if vector packed SSE conversion should be used.
>
> gcc/
>
>         PR target/101900
>         * config/i386/i386-features.c (remove_partial_avx_dependency):
>         Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS
>         before generating vxorps.
>
> gcc/
>
>         PR target/101900
>         * testsuite/gcc.target/i386/pr101900-1.c: New test.
>         * testsuite/gcc.target/i386/pr101900-2.c: Likewise.
>         * testsuite/gcc.target/i386/pr101900-3.c: Likewise.
> ---
>  gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
>  gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
>  4 files changed, 73 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
>
> diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
> index 5a99ea7c046..ae5ea02a002 100644
> --- a/gcc/config/i386/i386-features.c
> +++ b/gcc/config/i386/i386-features.c
> @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void)
>               != AVX_PARTIAL_XMM_UPDATE_TRUE)
>             continue;
>
> -         if (!v4sf_const0)
> -           v4sf_const0 = gen_reg_rtx (V4SFmode);
> -
>           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
>              SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
>              vec_merge with subreg.  */
>           rtx src = SET_SRC (set);
>           rtx dest = SET_DEST (set);
>           machine_mode dest_mode = GET_MODE (dest);
> +         machine_mode src_mode;
> +
> +         if (TARGET_USE_VECTOR_FP_CONVERTS)
> +           {
> +             src_mode = GET_MODE (XEXP (src, 0));
> +             if (src_mode == E_SFmode || src_mode == E_DFmode)
> +               continue;
> +           }
> +
> +         if (TARGET_USE_VECTOR_CONVERTS)
> +           {
> +             src_mode = GET_MODE (XEXP (src, 0));
> +             if (src_mode == E_SImode || src_mode == E_DImode)
> +               continue;
> +           }
> +
> +         if (!v4sf_const0)
> +           v4sf_const0 = gen_reg_rtx (V4SFmode);

Please better move initialization of src_mode to the top of the new hunk, like:

machine_mode src_mode = GET_MODE (XEXP (src, 0));
switch (src_mode)
{
  case E_SFmode:
  case E_DFmode:
    if (TARGET_USE_VECTOR_FP_CONVERTS)
      continue;
    break;
  case E_SImode:
  case E_DImode:
    if (TARGET_USE_VECTOR_CONVERTS)
      continue;
    break;
  default:
    break;
}

or something like the above.

Uros.

>
>           rtx zero;
>           machine_mode dest_vecmode;
> diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c 
> b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> new file mode 100644
> index 00000000000..0a45f8e340a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=skylake -mfpmath=sse 
> -mtune-ctrl=use_vector_fp_converts" } */
> +
> +extern float f;
> +extern double d;
> +extern int i;
> +
> +void
> +foo (void)
> +{
> +  d = f;
> +  f = i;
> +}
> +
> +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
> +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c 
> b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> new file mode 100644
> index 00000000000..c8b2d1da5ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=skylake -mfpmath=sse 
> -mtune-ctrl=use_vector_converts" } */
> +
> +extern float f;
> +extern double d;
> +extern int i;
> +
> +void
> +foo (void)
> +{
> +  d = f;
> +  f = i;
> +}
> +
> +/* { dg-final { scan-assembler "vcvtss2sd" } } */
> +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c 
> b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> new file mode 100644
> index 00000000000..6ee565b5bd4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=skylake -mfpmath=sse 
> -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */
> +
> +extern float f;
> +extern double d;
> +extern int i;
> +
> +void
> +foo (void)
> +{
> +  d = f;
> +  f = i;
> +}
> +
> +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> +/* { dg-final { scan-assembler-not "vxorps" } } */
> --
> 2.17.1
>

Reply via email to