On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubiz...@gmail.com> wrote:
> >
> > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics
> > > without enabling SSE vector instructions.
> >
> > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current
> > situation reflects that correctly.
>
> CRC32 is similar to POPCNT which was originally in SSE4.2.   Now POPCNT

It is not similar, POPCNT has its own CPUID flag and can be enabled
independently of SSE4.2.

> is a separate feature which is also enabled by SSE4.2.   Enable CRC32 only
> with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only.   This
> patch addresses this issue the same way as POPCNT.

CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless.

OTOH, the situation is similar with MONITOR and MWAIT. These are
enabled with SSE3 and don't use XMM registers. Also somewhat similar
is FISTTP, but there is no intrinsic for this insn.

Uros.

>
> > [1] https://en.wikipedia.org/wiki/SSE4
> >
> > Uros.
> >
> > >         * config/i386/gnu-property.c
> > >         (file_end_indicate_exec_stack_and_gnu_property): Also check
> > >         TARGET_CRC32 for GNU_PROPERTY_X86_ISA_1_V2.
> > >         * config/i386/i386-c.c (ix86_target_macros_internal): Define
> > >         __CRC32__ for -mcrc32.
> > >         * config/i386/i386-options.c (ix86_option_override_internal):
> > >         Handle PTA_CRC32.  Enable crc32 instruction for -msse4.2.
> > >         * config/i386/i386.h (PTA_CRC32): New.
> > >         (PTA_X86_64_V2): Add PTA_CRC32.
> > >         (PTA_NEHALEM): Likewise.
> > >         * config/i386/i386.md (sse4_2_crc32<mode>): Remove TARGET_SSE4_2
> > >         check.
> > >         (sse4_2_crc32di): Likewise.
> > >         * config/i386/ia32intrin.h: Use crc32 target option for CRC32
> > >         intrinsics.
> > > ---
> > >  gcc/config/i386/gnu-property.c |  1 +
> > >  gcc/config/i386/i386-c.c       |  2 ++
> > >  gcc/config/i386/i386-options.c |  8 ++++++++
> > >  gcc/config/i386/i386.h         |  6 ++++--
> > >  gcc/config/i386/i386.md        |  4 ++--
> > >  gcc/config/i386/ia32intrin.h   | 28 ++++++++++++++--------------
> > >  6 files changed, 31 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/gnu-property.c 
> > > b/gcc/config/i386/gnu-property.c
> > > index 4ba04403002..b6a3bdf62ce 100644
> > > --- a/gcc/config/i386/gnu-property.c
> > > +++ b/gcc/config/i386/gnu-property.c
> > > @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void)
> > >        /* GNU_PROPERTY_X86_ISA_1_V2.  */
> > >        if (TARGET_CMPXCHG16B
> > >           || (TARGET_64BIT && TARGET_SAHF)
> > > +         || TARGET_CRC32
> > >           || TARGET_POPCNT
> > >           || TARGET_SSE3
> > >           || TARGET_SSSE3
> > > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> > > index be46d0506ad..5ed0de006fb 100644
> > > --- a/gcc/config/i386/i386-c.c
> > > +++ b/gcc/config/i386/i386-c.c
> > > @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
> > >      def_or_undef (parse_in, "__LZCNT__");
> > >    if (isa_flag & OPTION_MASK_ISA_TBM)
> > >      def_or_undef (parse_in, "__TBM__");
> > > +  if (isa_flag & OPTION_MASK_ISA_CRC32)
> > > +    def_or_undef (parse_in, "__CRC32__");
> > >    if (isa_flag & OPTION_MASK_ISA_POPCNT)
> > >      def_or_undef (parse_in, "__POPCNT__");
> > >    if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
> > > diff --git a/gcc/config/i386/i386-options.c 
> > > b/gcc/config/i386/i386-options.c
> > > index 91da2849c49..959ee163d2f 100644
> > > --- a/gcc/config/i386/i386-options.c
> > > +++ b/gcc/config/i386/i386-options.c
> > > @@ -2162,6 +2162,9 @@ ix86_option_override_internal (bool main_args_p,
> > >         if (((processor_alias_table[i].flags & PTA_CX16) != 0)
> > >             && !(opts->x_ix86_isa_flags2_explicit & 
> > > OPTION_MASK_ISA2_CX16))
> > >           opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CX16;
> > > +       if (((processor_alias_table[i].flags & PTA_CRC32) != 0)
> > > +           && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CRC32))
> > > +         opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32;
> > >         if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 
> > > 0)
> > >             && !(opts->x_ix86_isa_flags_explicit & 
> > > OPTION_MASK_ISA_POPCNT))
> > >           opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
> > > @@ -2617,6 +2620,11 @@ ix86_option_override_internal (bool main_args_p,
> > >      opts->x_ix86_isa_flags
> > >        |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
> > >
> > > +  /* Enable crc32 instruction for -msse4.2.  */
> > > +  if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags))
> > > +    opts->x_ix86_isa_flags
> > > +      |= OPTION_MASK_ISA_CRC32 & ~opts->x_ix86_isa_flags_explicit;
> > > +
> > >    /* Enable lzcnt instruction for -mabm.  */
> > >    if (TARGET_ABM_P(opts->x_ix86_isa_flags))
> > >      opts->x_ix86_isa_flags
> > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > > index 97700d797a7..c50f9ab24fa 100644
> > > --- a/gcc/config/i386/i386.h
> > > +++ b/gcc/config/i386/i386.h
> > > @@ -2504,12 +2504,14 @@ constexpr wide_int_bitmask PTA_HRESET (0, 
> > > HOST_WIDE_INT_1U << 23);
> > >  constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
> > >  constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
> > >  constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26);
> > > +constexpr wide_int_bitmask PTA_CRC32 (0, HOST_WIDE_INT_1U << 27);
> > >
> > >  constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | 
> > > PTA_SSE
> > >    | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
> > >  constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE
> > >                                             & (~PTA_NO_SAHF))
> > > -  | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | 
> > > PTA_SSSE3;
> > > +  | PTA_CRC32 | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | 
> > > PTA_SSE4_2
> > > +  | PTA_SSSE3;
> > >  constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2
> > >    | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | 
> > > PTA_LZCNT
> > >    | PTA_MOVBE | PTA_XSAVE;
> > > @@ -2519,7 +2521,7 @@ constexpr wide_int_bitmask PTA_X86_64_V4 = 
> > > PTA_X86_64_V3
> > >  constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | 
> > > PTA_SSE2
> > >    | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
> > >  constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | 
> > > PTA_SSE4_2
> > > -  | PTA_POPCNT;
> > > +  | PTA_CRC32 | PTA_POPCNT;
> > >  constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL;
> > >  constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | 
> > > PTA_XSAVE
> > >    | PTA_XSAVEOPT;
> > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > > index 9ff35d9a607..1f1d74e6275 100644
> > > --- a/gcc/config/i386/i386.md
> > > +++ b/gcc/config/i386/i386.md
> > > @@ -20998,7 +20998,7 @@ (define_insn "sse4_2_crc32<mode>"
> > >           [(match_operand:SI 1 "register_operand" "0")
> > >            (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
> > >           UNSPEC_CRC32))]
> > > -  "TARGET_SSE4_2 || TARGET_CRC32"
> > > +  "TARGET_CRC32"
> > >    "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
> > >    [(set_attr "type" "sselog1")
> > >     (set_attr "prefix_rep" "1")
> > > @@ -21019,7 +21019,7 @@ (define_insn "sse4_2_crc32di"
> > >           [(match_operand:DI 1 "register_operand" "0")
> > >            (match_operand:DI 2 "nonimmediate_operand" "rm")]
> > >           UNSPEC_CRC32))]
> > > -  "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)"
> > > +  "TARGET_64BIT && TARGET_CRC32"
> > >    "crc32{q}\t{%2, %0|%0, %2}"
> > >    [(set_attr "type" "sselog1")
> > >     (set_attr "prefix_rep" "1")
> > > diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
> > > index 591394076cc..5422b0fc9e0 100644
> > > --- a/gcc/config/i386/ia32intrin.h
> > > +++ b/gcc/config/i386/ia32intrin.h
> > > @@ -51,11 +51,11 @@ __bswapd (int __X)
> > >
> > >  #ifndef __iamcu__
> > >
> > > -#ifndef __SSE4_2__
> > > +#ifndef __CRC32__
> > >  #pragma GCC push_options
> > > -#pragma GCC target("sse4.2")
> > > -#define __DISABLE_SSE4_2__
> > > -#endif /* __SSE4_2__ */
> > > +#pragma GCC target("crc32")
> > > +#define __DISABLE_CRC32__
> > > +#endif /* __CRC32__ */
> > >
> > >  /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
> > >  extern __inline unsigned int
> > > @@ -79,10 +79,10 @@ __crc32d (unsigned int __C, unsigned int __V)
> > >    return __builtin_ia32_crc32si (__C, __V);
> > >  }
> > >
> > > -#ifdef __DISABLE_SSE4_2__
> > > -#undef __DISABLE_SSE4_2__
> > > +#ifdef __DISABLE_CRC32__
> > > +#undef __DISABLE_CRC32__
> > >  #pragma GCC pop_options
> > > -#endif /* __DISABLE_SSE4_2__ */
> > > +#endif /* __DISABLE_CRC32__ */
> > >
> > >  #endif /* __iamcu__ */
> > >
> > > @@ -199,11 +199,11 @@ __bswapq (long long __X)
> > >    return __builtin_bswap64 (__X);
> > >  }
> > >
> > > -#ifndef __SSE4_2__
> > > +#ifndef __CRC32__
> > >  #pragma GCC push_options
> > > -#pragma GCC target("sse4.2")
> > > -#define __DISABLE_SSE4_2__
> > > -#endif /* __SSE4_2__ */
> > > +#pragma GCC target("crc32")
> > > +#define __DISABLE_CRC32__
> > > +#endif /* __CRC32__ */
> > >
> > >  /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
> > >  extern __inline unsigned long long
> > > @@ -213,10 +213,10 @@ __crc32q (unsigned long long __C, unsigned long 
> > > long __V)
> > >    return __builtin_ia32_crc32di (__C, __V);
> > >  }
> > >
> > > -#ifdef __DISABLE_SSE4_2__
> > > -#undef __DISABLE_SSE4_2__
> > > +#ifdef __DISABLE_CRC32__
> > > +#undef __DISABLE_CRC32__
> > >  #pragma GCC pop_options
> > > -#endif /* __DISABLE_SSE4_2__ */
> > > +#endif /* __DISABLE_CRC32__ */
> > >
> > >  /* 64bit popcnt */
> > >  extern __inline long long
> > > --
> > > 2.30.2
> > >
>
>
>
> --
> H.J.

Reply via email to