Forget --in-reply-to when git send-email.

> I was thinking about:
>
> --cut here--
> @@ -19194,9 +19194,17 @@ ix86_preferred_reload_class (rtx x,
> reg_class_t regclass)
>       return NO_REGS;
>     }
>
> -  /* Prefer SSE regs only, if we can use them for math.  */
> +  /* Prefer SSE if we can use them for math.  Also allow integer regs
> +     when moves between register units are cheap.  */
>   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> -    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +    {
> +      if (TARGET_INTER_UNIT_MOVES_FROM_VEC
> +         && TARGET_INTER_UNIT_MOVES_TO_VEC
> +         && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
> +       return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +      else
> +       return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +    }
>
>   /* Generally when we see PLUS here, it's the function invariant
>      (plus soft-fp const_int).  Which can only be computed into general
> --cut here--
>
> So, INT_SSE class is allowed when interunit moves are enabled. The
> patch also takes care for 64-bit moves which are expensive on 32-bit
> targets.

I like your version, update patch.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} w/ and w/o -march=k8.


On Mon, Dec 6, 2021 at 11:41 AM liuhongt <hongtao....@intel.com> wrote:
>
> When moves between integer and sse registers are cheap.
>
> 2021-12-06  Hongtao Liu  <hongtao....@intel.com>
>             Uroš Bizjak  <ubiz...@gmail.com>
> gcc/ChangeLog:
>
>         PR target/95740
>         * config/i386/i386.c (ix86_preferred_reload_class): Allow
>         integer regs when moves between register units are cheap.
>         * config/i386/i386.h (INT_SSE_CLASS_P): New.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr95740.c: New test.
> ---
>  gcc/config/i386/i386.c                  | 12 ++++++++++--
>  gcc/config/i386/i386.h                  |  2 ++
>  gcc/testsuite/gcc.target/i386/pr95740.c | 26 +++++++++++++++++++++++++
>  3 files changed, 38 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95740.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 80fee627358..e3c2e294988 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19194,9 +19194,17 @@ ix86_preferred_reload_class (rtx x, reg_class_t 
> regclass)
>        return NO_REGS;
>      }
>
> -  /* Prefer SSE regs only, if we can use them for math.  */
> +  /* Prefer SSE if we can use them for math.  Also allow integer regs
> +     when moves between register units are cheap.  */
>    if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> -    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +    {
> +      if (TARGET_INTER_UNIT_MOVES_FROM_VEC
> +         && TARGET_INTER_UNIT_MOVES_TO_VEC
> +         && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
> +       return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +      else
> +       return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +    }
>
>    /* Generally when we see PLUS here, it's the function invariant
>       (plus soft-fp const_int).  Which can only be computed into general
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 2fda1e0686e..ec90e47904b 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1283,6 +1283,8 @@ enum reg_class
>    reg_class_subset_p ((CLASS), FLOAT_REGS)
>  #define SSE_CLASS_P(CLASS) \
>    reg_class_subset_p ((CLASS), ALL_SSE_REGS)
> +#define INT_SSE_CLASS_P(CLASS) \
> +  reg_class_subset_p ((CLASS), INT_SSE_REGS)
>  #define MMX_CLASS_P(CLASS) \
>    ((CLASS) == MMX_REGS)
>  #define MASK_CLASS_P(CLASS) \
> diff --git a/gcc/testsuite/gcc.target/i386/pr95740.c 
> b/gcc/testsuite/gcc.target/i386/pr95740.c
> new file mode 100644
> index 00000000000..7ecd71ba8c1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95740.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-msse2 -O2 -mtune=generic -mtune-ctrl=use_incdec -masm=att 
> -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-times {(?n)movd[\t ]*%xmm0.*%eax} 1 } } */
> +/* { dg-final { scan-assembler-times {(?n)incl[\t ]*%eax} 1 } } */
> +/* { dg-final { scan-assembler-times {(?n)movq[\t ]*%xmm0.*%rax} 1 } } */
> +/* { dg-final { scan-assembler-times {(?n)incq[\t ]*%rax} 1 } } */
> +
> +int
> +foo (float a)
> +{
> +  union{
> +    int b;
> +    float a;}u;
> +  u.a = a;
> +  return u.b + 1;
> +}
> +
> +long long
> +foo1 (double a)
> +{
> +  union{
> +    long long b;
> +    double a;}u;
> +  u.a = a;
> +  return u.b + 1;
> +}
> --
> 2.18.2
>


--
BR,
Hongtao

Reply via email to