On Wed, Sep 30, 2020 at 2:27 PM Florian Weimer <f...@deneb.enyo.de> wrote: > > These micro-architecture levels are defined in the x86-64 psABI: > > https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9 > > PTA_NO_TUNE is introduced so that the new processor alias table entries > do not affect the CPU tuning setting in ix86_tune. > > The tests depend on the macros added in commit 92e652d8c21bd7e66cbb0f900 > ("i386: Define __LAHF_SAHF__ and __MOVBE__ macros, based on ISA flags"). > > gcc/: > PR target/97250 > * config/i386/i386.h (PTA_NO_TUNE, PTA_X86_64_BASELINE) > (PTA_X86_64_V2, PTA_X86_64_V3, PTA_X86_64_V4): New. > * common/config/i386/i386-common.c (processor_alias_table): > Add "x86-64-v2", "x86-64-v3", "x86-64-v4". > * config/i386/i386-options.c (ix86_option_override_internal): > Handle new PTA_NO_TUNE processor table entries. > * doc/invoke.texi (x86 Options): Document new -march values. > > gcc/testsuite/: > PR target/97250 > * gcc.target/i386/x86-64-v2.c: New test. > * gcc.target/i386/x86-64-v3.c: New test. > * gcc.target/i386/x86-64-v4.c: New test.
Perhaps you should also test for the newly introduced __LAHF_SAHF__ define? Uros. > --- > > Notes (not going to be committed); > > I struggled a bit without avoid ICEs when I used PROCESSOR_GENERIC > instead of PROCESSOR_K8 in the new process alias table entries. In > the end, I think not resetting the tuning setting is the correct thing > to do. > > Test results on x86-64 (on Debian buster) look okay-ish to me. I see > lots of obviously unrelated FAILs. > > gcc/common/config/i386/i386-common.c | 10 ++- > gcc/config/i386/i386-options.c | 27 +++++-- > gcc/config/i386/i386.h | 11 ++- > gcc/doc/invoke.texi | 7 ++ > gcc/testsuite/gcc.target/i386/x86-64-v2.c | 113 > ++++++++++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/x86-64-v3.c | 113 > ++++++++++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/x86-64-v4.c | 113 > ++++++++++++++++++++++++++++++ > 7 files changed, 385 insertions(+), 9 deletions(-) > > diff --git a/gcc/common/config/i386/i386-common.c > b/gcc/common/config/i386/i386-common.c > index 10142149115..62a620b4430 100644 > --- a/gcc/common/config/i386/i386-common.c > +++ b/gcc/common/config/i386/i386-common.c > @@ -1795,9 +1795,13 @@ const pta processor_alias_table[] = > PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR, 0, P_NONE}, > {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, > PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR, 0, P_NONE}, > - {"x86-64", PROCESSOR_K8, CPU_K8, > - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, > - 0, P_NONE}, > + {"x86-64", PROCESSOR_K8, CPU_K8, PTA_X86_64_BASELINE, 0, P_NONE}, > + {"x86-64-v2", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V2 | PTA_NO_TUNE, > + 0, P_NONE}, > + {"x86-64-v3", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V3 | PTA_NO_TUNE, > + 0, P_NONE}, > + {"x86-64-v4", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V4 | PTA_NO_TUNE, > + 0, P_NONE}, > {"eden-x2", PROCESSOR_K8, CPU_K8, > PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR, > 0, P_NONE}, > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c > index 597de533fbd..cf48a911798 100644 > --- a/gcc/config/i386/i386-options.c > +++ b/gcc/config/i386/i386-options.c > @@ -2058,10 +2058,25 @@ ix86_option_override_internal (bool main_args_p, > return false; > } > > + /* Only the x86-64 psABI defines the feature-only > + micro-architecture levels that use PTA_NO_TUNE. */ > + if ((processor_alias_table[i].flags & PTA_NO_TUNE) != 0 > + && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) > + || opts->x_ix86_abi != SYSV_ABI)) > + { > + error (G_("%<%s%> architecture level is only defined" > + " for the x86-64 psABI"), opts->x_ix86_arch_string); > + return false; > + } > + > ix86_schedule = processor_alias_table[i].schedule; > ix86_arch = processor_alias_table[i].processor; > - /* Default cpu tuning to the architecture. */ > - ix86_tune = ix86_arch; > + > + /* Default cpu tuning to the architecture, unless the table > + entry requests not to do this. Used by the x86-64 psABI > + micro-architecture levels. */ > + if ((processor_alias_table[i].flags & PTA_NO_TUNE) == 0) > + ix86_tune = ix86_arch; > > if (((processor_alias_table[i].flags & PTA_MMX) != 0) > && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) > @@ -2384,7 +2399,8 @@ ix86_option_override_internal (bool main_args_p, > ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & > ix86_arch_mask); > > for (i = 0; i < pta_size; i++) > - if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) > + if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name) > + && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0) > { > ix86_schedule = processor_alias_table[i].schedule; > ix86_tune = processor_alias_table[i].processor; > @@ -2428,8 +2444,9 @@ ix86_option_override_internal (bool main_args_p, > > auto_vec <const char *> candidates; > for (i = 0; i < pta_size; i++) > - if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) > - || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) > + if ((!TARGET_64BIT_P (opts->x_ix86_isa_flags) > + || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) > + && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0) > candidates.safe_push (processor_alias_table[i].name); > > #ifdef HAVE_LOCAL_CPU_DETECT > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index a449653cc3e..9a5de6a0e9c 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -2433,7 +2433,7 @@ const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << > 40); > const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41); > const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42); > const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43); > -/* Hole after PTA_MPX was removed. */ > +const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44); > const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45); > const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46); > const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47); > @@ -2476,6 +2476,15 @@ const wide_int_bitmask PTA_AMX_TILE(0, > HOST_WIDE_INT_1U << 19); > const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20); > const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21); > > +const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE > + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; > +const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF)) > + | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; > +const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 > + | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT > + | PTA_MOVBE | PTA_XSAVE; > +const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 > + | PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL; > const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 > | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; > const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 9a4903306d0..ec1eb7f7bd7 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -29258,6 +29258,13 @@ of the selected instruction set. > @item x86-64 > A generic CPU with 64-bit extensions. > > +@item x86-64-v2 > +@itemx x86-64-v3 > +@itemx x86-64-v4 > +These choices for @var{cpu-type} select the corresponding > +micro-architecture level from the x86-64 psABI. They are only available > +when compiling for a x86-64 target that uses the System V psABI@. > + > @item i386 > Original Intel i386 CPU@. > > diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v2.c > b/gcc/testsuite/gcc.target/i386/x86-64-v2.c > new file mode 100644 > index 00000000000..65efc9d60fa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/x86-64-v2.c > @@ -0,0 +1,113 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mabi=sysv -march=x86-64-v2" } */ > + > +/* Verify that the CPU features required by x86-64-v2 are enabled. */ > + > +#ifndef __MMX__ > +# error __MMX__ not defined > +#endif > +#ifndef __SSE__ > +# error __SSE__ not defined > +#endif > +#ifndef __SSE2__ > +# error __SSE2__ not defined > +#endif > +#ifndef __POPCNT__ > +# error __POPCNT__ not defined > +#endif > +#ifndef __SSE3__ > +# error __SSE3__ not defined > +#endif > +#ifndef __SSE4_1__ > +# error __SSE4_1__ not defined > +#endif > +#ifndef __SSE4_2__ > +# error __SSE4_2__ not defined > +#endif > +#ifndef __SSSE3__ > +# error __SSSE3__ not defined > +#endif > +#ifdef __SSE4A__ > +# error __SSE4A__ defined > +#endif > +#ifdef __AVX__ > +# error __AVX__ defined > +#endif > +#ifdef __AVX2__ > +# error __AVX2__ defined > +#endif > +#ifdef __F16C__ > +# error __F16C__ defined > +#endif > +#ifdef __FMA__ > +# error __FMA__ defined > +#endif > +#ifdef __LZCNT__ > +# error __LZCNT__ defined > +#endif > +#ifdef __MOVBE__ > +# error __MOVBE__ defined > +#endif > +#ifdef __XSAVE__ > +# error __XSAVE__ defined > +#endif > +#ifdef __XSAVEC__ > +# error __XSAVEC__ defined > +#endif > +#ifdef __AVX512F__ > +# error __AVX512F__ defined > +#endif > +#ifdef __AVX512BW__ > +# error __AVX512BW__ defined > +#endif > +#ifdef __AVX512CD__ > +# error __AVX512CD__ defined > +#endif > +#ifdef __AVX512DQ__ > +# error __AVX512DQ__ defined > +#endif > +#ifdef __AVX512VL__ > +# error __AVX512VL__ defined > +#endif > +#ifdef __AVX512PF__ > +# error __AVX512PF__ defined > +#endif > +#ifdef __AVX512VBMI__ > +# error __AVX512VBMI__ defined > +#endif > +#ifdef __AVX512IFMA__ > +# error __AVX512IFMA__ defined > +#endif > +#ifdef __AVX512VNNIW__ > +# error __AVX512VNNIW__ defined > +#endif > +#ifdef __AVX512VBMI2__ > +# error __AVX512VBMI2__ defined > +#endif > +#ifdef __AVX5124FMAPS__ > +# error __AVX5124FMAPS__ defined > +#endif > +#ifdef __AVX5124BITALG__ > +# error __AVX5124BITALG__ defined > +#endif > +#ifdef __AVX5124VPOPCNTDQ__ > +# error __AVX5124VPOPCNTDQ__ defined > +#endif > +#ifdef __AVX5124BF16__ > +# error __AVX5124BF16__ defined > +#endif > +#ifdef __AVX512VP2INTERSECT__ > +# error __AVX512VP2INTERSECT__ defined > +#endif > +#ifdef __AVX512VNNI__ > +# error __AVX512VNNI__ defined > +#endif > +#ifdef __FMA4__ > +# error __FMA4__ defined > +#endif > +#ifdef __3dNOW__ > +# error __3dNOW__ defined > +#endif > +#ifdef __tune_k8__ > +# error __tune_k8__ defined > +#endif > diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3.c > b/gcc/testsuite/gcc.target/i386/x86-64-v3.c > new file mode 100644 > index 00000000000..6865d427fcb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3.c > @@ -0,0 +1,113 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mabi=sysv -march=x86-64-v3" } */ > + > +/* Verify that the CPU features required by x86-64-v4 are enabled. */ > + > +#ifndef __MMX__ > +# error __MMX__ not defined > +#endif > +#ifndef __SSE__ > +# error __SSE__ not defined > +#endif > +#ifndef __SSE2__ > +# error __SSE2__ not defined > +#endif > +#ifndef __POPCNT__ > +# error __POPCNT__ not defined > +#endif > +#ifndef __SSE3__ > +# error __SSE3__ not defined > +#endif > +#ifndef __SSE4_1__ > +# error __SSE4_1__ not defined > +#endif > +#ifndef __SSE4_2__ > +# error __SSE4_2__ not defined > +#endif > +#ifndef __SSSE3__ > +# error __SSSE3__ not defined > +#endif > +#ifdef __SSE4A__ > +# error __SSE4A__ defined > +#endif > +#ifndef __AVX__ > +# error __AVX__ not defined > +#endif > +#ifndef __AVX2__ > +# error __AVX2__ not defined > +#endif > +#ifndef __F16C__ > +# error __F16C__ not defined > +#endif > +#ifndef __FMA__ > +# error __FMA__ not defined > +#endif > +#ifndef __LZCNT__ > +# error __LZCNT__ not defined > +#endif > +#ifndef __MOVBE__ > +# error __MOVBE__ not defined > +#endif > +#ifndef __XSAVE__ > +# error __XSAVE__ not defined > +#endif > +#ifdef __XSAVEC__ > +# error __XSAVEC__ defined > +#endif > +#ifdef __AVX512F__ > +# error __AVX512F__ defined > +#endif > +#ifdef __AVX512BW__ > +# error __AVX512BW__ defined > +#endif > +#ifdef __AVX512CD__ > +# error __AVX512CD__ defined > +#endif > +#ifdef __AVX512DQ__ > +# error __AVX512DQ__ defined > +#endif > +#ifdef __AVX512VL__ > +# error __AVX512VL__ defined > +#endif > +#ifdef __AVX512PF__ > +# error __AVX512PF__ defined > +#endif > +#ifdef __AVX512VBMI__ > +# error __AVX512VBMI__ defined > +#endif > +#ifdef __AVX512IFMA__ > +# error __AVX512IFMA__ defined > +#endif > +#ifdef __AVX512VNNIW__ > +# error __AVX512VNNIW__ defined > +#endif > +#ifdef __AVX512VBMI2__ > +# error __AVX512VBMI2__ defined > +#endif > +#ifdef __AVX5124FMAPS__ > +# error __AVX5124FMAPS__ defined > +#endif > +#ifdef __AVX5124BITALG__ > +# error __AVX5124BITALG__ defined > +#endif > +#ifdef __AVX5124VPOPCNTDQ__ > +# error __AVX5124VPOPCNTDQ__ defined > +#endif > +#ifdef __AVX5124BF16__ > +# error __AVX5124BF16__ defined > +#endif > +#ifdef __AVX512VP2INTERSECT__ > +# error __AVX512VP2INTERSECT__ defined > +#endif > +#ifdef __AVX512VNNI__ > +# error __AVX512VNNI__ defined > +#endif > +#ifdef __FMA4__ > +# error __FMA4__ defined > +#endif > +#ifdef __3dNOW__ > +# error __3dNOW__ defined > +#endif > +#ifdef __tune_k8__ > +# error __tune_k8__ defined > +#endif > diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v4.c > b/gcc/testsuite/gcc.target/i386/x86-64-v4.c > new file mode 100644 > index 00000000000..140fec7ac65 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/x86-64-v4.c > @@ -0,0 +1,113 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mabi=sysv -march=x86-64-v4" } */ > + > +/* Verify that the CPU features required by x86-64-v4 are enabled. */ > + > +#ifndef __MMX__ > +# error __MMX__ not defined > +#endif > +#ifndef __SSE__ > +# error __SSE__ not defined > +#endif > +#ifndef __SSE2__ > +# error __SSE2__ not defined > +#endif > +#ifndef __POPCNT__ > +# error __POPCNT__ not defined > +#endif > +#ifndef __SSE3__ > +# error __SSE3__ not defined > +#endif > +#ifndef __SSE4_1__ > +# error __SSE4_1__ not defined > +#endif > +#ifndef __SSE4_2__ > +# error __SSE4_2__ not defined > +#endif > +#ifndef __SSSE3__ > +# error __SSSE3__ not defined > +#endif > +#ifdef __SSE4A__ > +# error __SSE4A__ defined > +#endif > +#ifndef __AVX__ > +# error __AVX__ not defined > +#endif > +#ifndef __AVX2__ > +# error __AVX2__ not defined > +#endif > +#ifndef __F16C__ > +# error __F16C__ not defined > +#endif > +#ifndef __FMA__ > +# error __FMA__ not defined > +#endif > +#ifndef __LZCNT__ > +# error __LZCNT__ not defined > +#endif > +#ifndef __MOVBE__ > +# error __MOVBE__ not defined > +#endif > +#ifndef __XSAVE__ > +# error __XSAVE__ not defined > +#endif > +#ifdef __XSAVEC__ > +# error __XSAVEC__ defined > +#endif > +#ifndef __AVX512F__ > +# error __AVX512F__ not defined > +#endif > +#ifndef __AVX512BW__ > +# error __AVX512BW__ not defined > +#endif > +#ifndef __AVX512CD__ > +# error __AVX512CD__ not defined > +#endif > +#ifndef __AVX512DQ__ > +# error __AVX512DQ__ not defined > +#endif > +#ifndef __AVX512VL__ > +# error __AVX512VL__ not defined > +#endif > +#ifdef __AVX512PF__ > +# error __AVX512PF__ defined > +#endif > +#ifdef __AVX512VBMI__ > +# error __AVX512VBMI__ defined > +#endif > +#ifdef __AVX512IFMA__ > +# error __AVX512IFMA__ defined > +#endif > +#ifdef __AVX512VNNIW__ > +# error __AVX512VNNIW__ defined > +#endif > +#ifdef __AVX512VBMI2__ > +# error __AVX512VBMI2__ defined > +#endif > +#ifdef __AVX5124FMAPS__ > +# error __AVX5124FMAPS__ defined > +#endif > +#ifdef __AVX5124BITALG__ > +# error __AVX5124BITALG__ defined > +#endif > +#ifdef __AVX5124VPOPCNTDQ__ > +# error __AVX5124VPOPCNTDQ__ defined > +#endif > +#ifdef __AVX5124BF16__ > +# error __AVX5124BF16__ defined > +#endif > +#ifdef __AVX512VP2INTERSECT__ > +# error __AVX512VP2INTERSECT__ defined > +#endif > +#ifdef __AVX512VNNI__ > +# error __AVX512VNNI__ defined > +#endif > +#ifdef __FMA4__ > +# error __FMA4__ defined > +#endif > +#ifdef __3dNOW__ > +# error __3dNOW__ defined > +#endif > +#ifdef __tune_k8__ > +# error __tune_k8__ defined > +#endif