On Wed, Aug 9, 2023 at 3:48 AM liuhongt <hongtao....@intel.com> wrote:
>
> > Please rather do it in a more self-descriptive way, as proposed in the
> > attached patch. You won't need a comment then.
> >
>
> Adjusted in V2 patch.
>
> Don't access leaf 7 subleaf 1 unless subleaf 0 says it is
> supported via EAX.
>
> Intel documentation says invalid subleaves return 0. We had been
> relying on that behavior instead of checking the max sublef number.

Probably a documentation bug, even Wikipedia says about CPUID:

EAX=7, ECX=0: Extended Features

This returns extended feature flags in EBX, ECX, and EDX. Returns the
maximum ECX value for EAX=7 in EAX.

> It appears that some Sandy Bridge CPUs return at least the subleaf 0
> EDX value for subleaf 1. Best guess is that this is a bug in a
> microcode patch since all of the bits we're seeing set in EDX were
> introduced after Sandy Bridge was originally released.
>
> This is causing avxvnniint16 to be incorrectly enabled with
> -march=native on these CPUs.
>
> gcc/ChangeLog:
>
>         * common/config/i386/cpuinfo.h (get_available_features): Check
>         EAX for valid subleaf before use CPUID.

OK for mainline and backports.

Thanks,
Uros.

> ---
>  gcc/common/config/i386/cpuinfo.h | 82 +++++++++++++++++---------------
>  1 file changed, 43 insertions(+), 39 deletions(-)
>
> diff --git a/gcc/common/config/i386/cpuinfo.h 
> b/gcc/common/config/i386/cpuinfo.h
> index 30ef0d334ca..9fa4dec2a7e 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -663,6 +663,7 @@ get_available_features (struct __processor_model 
> *cpu_model,
>    unsigned int max_cpuid_level = cpu_model2->__cpu_max_level;
>    unsigned int eax, ebx;
>    unsigned int ext_level;
> +  unsigned int subleaf_level;
>
>    /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
>  #define XCR_XFEATURE_ENABLED_MASK      0x0
> @@ -762,7 +763,7 @@ get_available_features (struct __processor_model 
> *cpu_model,
>    /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
>    if (max_cpuid_level >= 7)
>      {
> -      __cpuid_count (7, 0, eax, ebx, ecx, edx);
> +      __cpuid_count (7, 0, subleaf_level, ebx, ecx, edx);
>        if (ebx & bit_BMI)
>         set_feature (FEATURE_BMI);
>        if (ebx & bit_SGX)
> @@ -874,45 +875,48 @@ get_available_features (struct __processor_model 
> *cpu_model,
>             set_feature (FEATURE_AVX512FP16);
>         }
>
> -      __cpuid_count (7, 1, eax, ebx, ecx, edx);
> -      if (eax & bit_HRESET)
> -       set_feature (FEATURE_HRESET);
> -      if (eax & bit_CMPCCXADD)
> -       set_feature(FEATURE_CMPCCXADD);
> -      if (edx & bit_PREFETCHI)
> -       set_feature (FEATURE_PREFETCHI);
> -      if (eax & bit_RAOINT)
> -       set_feature (FEATURE_RAOINT);
> -      if (avx_usable)
> -       {
> -         if (eax & bit_AVXVNNI)
> -           set_feature (FEATURE_AVXVNNI);
> -         if (eax & bit_AVXIFMA)
> -           set_feature (FEATURE_AVXIFMA);
> -         if (edx & bit_AVXVNNIINT8)
> -           set_feature (FEATURE_AVXVNNIINT8);
> -         if (edx & bit_AVXNECONVERT)
> -           set_feature (FEATURE_AVXNECONVERT);
> -         if (edx & bit_AVXVNNIINT16)
> -           set_feature (FEATURE_AVXVNNIINT16);
> -         if (eax & bit_SM3)
> -           set_feature (FEATURE_SM3);
> -         if (eax & bit_SHA512)
> -           set_feature (FEATURE_SHA512);
> -         if (eax & bit_SM4)
> -           set_feature (FEATURE_SM4);
> -       }
> -      if (avx512_usable)
> -       {
> -         if (eax & bit_AVX512BF16)
> -           set_feature (FEATURE_AVX512BF16);
> -       }
> -      if (amx_usable)
> +      if (subleaf_level >= 1)
>         {
> -         if (eax & bit_AMX_FP16)
> -           set_feature (FEATURE_AMX_FP16);
> -         if (edx & bit_AMX_COMPLEX)
> -           set_feature (FEATURE_AMX_COMPLEX);
> +         __cpuid_count (7, 1, eax, ebx, ecx, edx);
> +         if (eax & bit_HRESET)
> +           set_feature (FEATURE_HRESET);
> +         if (eax & bit_CMPCCXADD)
> +           set_feature(FEATURE_CMPCCXADD);
> +         if (edx & bit_PREFETCHI)
> +           set_feature (FEATURE_PREFETCHI);
> +         if (eax & bit_RAOINT)
> +           set_feature (FEATURE_RAOINT);
> +         if (avx_usable)
> +           {
> +             if (eax & bit_AVXVNNI)
> +               set_feature (FEATURE_AVXVNNI);
> +             if (eax & bit_AVXIFMA)
> +               set_feature (FEATURE_AVXIFMA);
> +             if (edx & bit_AVXVNNIINT8)
> +               set_feature (FEATURE_AVXVNNIINT8);
> +             if (edx & bit_AVXNECONVERT)
> +               set_feature (FEATURE_AVXNECONVERT);
> +             if (edx & bit_AVXVNNIINT16)
> +               set_feature (FEATURE_AVXVNNIINT16);
> +             if (eax & bit_SM3)
> +               set_feature (FEATURE_SM3);
> +             if (eax & bit_SHA512)
> +               set_feature (FEATURE_SHA512);
> +             if (eax & bit_SM4)
> +               set_feature (FEATURE_SM4);
> +           }
> +         if (avx512_usable)
> +           {
> +             if (eax & bit_AVX512BF16)
> +               set_feature (FEATURE_AVX512BF16);
> +           }
> +         if (amx_usable)
> +           {
> +             if (eax & bit_AMX_FP16)
> +               set_feature (FEATURE_AMX_FP16);
> +             if (edx & bit_AMX_COMPLEX)
> +               set_feature (FEATURE_AMX_COMPLEX);
> +           }
>         }
>      }
>
> --
> 2.31.1
>

Reply via email to