This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG012dd42e027e: [X86] Support -march=x86-64-v[234] (authored by MaskRay).
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89197/new/ https://reviews.llvm.org/D89197 Files: clang/docs/ReleaseNotes.rst clang/docs/UsersManual.rst clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/test/CodeGen/attr-target-x86.c clang/test/Driver/x86-march.c clang/test/Driver/x86-mtune.c clang/test/Misc/target-invalid-cpu-note.c clang/test/Preprocessor/predefined-arch-macros-x86.c clang/test/Preprocessor/predefined-arch-macros.c clang/test/Sema/builtin-cpu-supports.c llvm/docs/ReleaseNotes.rst llvm/include/llvm/Support/X86TargetParser.h llvm/lib/Support/X86TargetParser.cpp llvm/lib/Target/X86/X86.td llvm/test/CodeGen/X86/cpus-other.ll
Index: llvm/test/CodeGen/X86/cpus-other.ll =================================================================== --- llvm/test/CodeGen/X86/cpus-other.ll +++ llvm/test/CodeGen/X86/cpus-other.ll @@ -16,6 +16,11 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +;; x86-64 micro-architecture levels. +; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v2 +; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v3 +; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v4 + define void @foo() { ret void } Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -558,18 +558,27 @@ //===----------------------------------------------------------------------===// def ProcessorFeatures { + // x86-64 and x86-64-v[234] + list<SubtargetFeature> X86_64V1Features = [ + FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, + FeatureFXSR, FeatureNOPL, Feature64Bit + ]; + list<SubtargetFeature> X86_64V2Features = !listconcat( + X86_64V1Features, + [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]); + list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ + FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, + FeatureMOVBE, FeatureXSAVE + ]); + list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ + FeatureBWI, + FeatureCDI, + FeatureDQI, + FeatureVLX, + ]); + // Nehalem - list<SubtargetFeature> NHMFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSE42, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeaturePOPCNT, - FeatureLAHFSAHF]; + list<SubtargetFeature> NHMFeatures = X86_64V2Features; list<SubtargetFeature> NHMTuning = [FeatureMacroFusion, FeatureInsertVZEROUPPER]; @@ -1350,16 +1359,7 @@ // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. -def : ProcModel<"x86-64", SandyBridgeModel, [ - FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSE2, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, -], +def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, [ FeatureSlow3OpsLEA, FeatureSlowDivide64, @@ -1368,6 +1368,16 @@ FeatureInsertVZEROUPPER ]>; +// x86-64 micro-architecture levels. +def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, + ProcessorFeatures.SNBTuning>; +// Close to Haswell. +def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, + ProcessorFeatures.HSWTuning>; +// Close to the AVX-512 level implemented by Xeon Scalable Processors. +def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features, + ProcessorFeatures.SKXTuning>; + //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// Index: llvm/lib/Support/X86TargetParser.cpp =================================================================== --- llvm/lib/Support/X86TargetParser.cpp +++ llvm/lib/Support/X86TargetParser.cpp @@ -137,6 +137,15 @@ // Basic 64-bit capable CPU. constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT; +constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF | + FeaturePOPCNT | FeatureSSE4_2 | + FeatureCMPXCHG16B; +constexpr FeatureBitset FeaturesX86_64_V3 = + FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C | + FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE; +constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | + FeatureAVX512BW | FeatureAVX512CD | + FeatureAVX512DQ | FeatureAVX512VL; // Intel Core CPUs constexpr FeatureBitset FeaturesCore2 = @@ -383,10 +392,15 @@ { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 }, // Generic 64-bit processor. { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 }, + { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 }, + { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 }, + { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 }, // Geode processors. { {"geode"}, CK_Geode, ~0U, FeaturesGeode }, }; +constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"}; + X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) { for (const auto &P : Processors) if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit)) @@ -395,6 +409,12 @@ return CK_None; } +X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) { + if (llvm::is_contained(NoTuneList, CPU)) + return CK_None; + return parseArchX86(CPU, Only64Bit); +} + void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool Only64Bit) { for (const auto &P : Processors) @@ -402,6 +422,14 @@ Values.emplace_back(P.Name); } +void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values, + bool Only64Bit) { + for (const ProcInfo &P : Processors) + if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) && + !llvm::is_contained(NoTuneList, P.Name)) + Values.emplace_back(P.Name); +} + ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) { // FIXME: Can we avoid a linear search here? The table might be sorted by // CPUKind so we could binary search? Index: llvm/include/llvm/Support/X86TargetParser.h =================================================================== --- llvm/include/llvm/Support/X86TargetParser.h +++ llvm/include/llvm/Support/X86TargetParser.h @@ -121,17 +121,24 @@ CK_ZNVER1, CK_ZNVER2, CK_x86_64, + CK_x86_64_v2, + CK_x86_64_v3, + CK_x86_64_v4, CK_Geode, }; /// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if /// \p Only64Bit is true. CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false); +CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false); /// Provide a list of valid CPU names. If \p Only64Bit is true, the list will /// only contain 64-bit capable CPUs. void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool Only64Bit = false); +/// Provide a list of valid -mtune names. +void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values, + bool Only64Bit = false); /// Get the key feature prioritizing target multiversioning. ProcessorFeatures getKeyFeature(CPUKind Kind); Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -105,7 +105,7 @@ * The 'mpx' feature was removed from the backend. It had been removed from clang frontend in 10.0. Mention of the 'mpx' feature in an IR file will print a message to stderr, but IR should still compile. -* Support for -march=sapphirerapids was added. +* Support for ``-march=sapphirerapids`` and ``-march=x86-64-v[234]`` has been added. * The assembler now has support for {disp32} and {disp8} pseudo prefixes for controlling displacement size for memory operands and jump displacements. The assembler also supports the .d32 and .d8 mnemonic suffixes to do the same. Index: clang/test/Sema/builtin-cpu-supports.c =================================================================== --- clang/test/Sema/builtin-cpu-supports.c +++ clang/test/Sema/builtin-cpu-supports.c @@ -15,6 +15,11 @@ if (__builtin_cpu_is("int")) // expected-error {{invalid cpu name for builtin}} a("intel"); + + (void)__builtin_cpu_is("x86-64"); // expected-error {{invalid cpu name for builtin}} + (void)__builtin_cpu_is("x86-64-v2"); // expected-error {{invalid cpu name for builtin}} + (void)__builtin_cpu_is("x86-64-v3"); // expected-error {{invalid cpu name for builtin}} + (void)__builtin_cpu_is("x86-64-v4"); // expected-error {{invalid cpu name for builtin}} #else if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}} a("vsx"); Index: clang/test/Preprocessor/predefined-arch-macros.c =================================================================== --- clang/test/Preprocessor/predefined-arch-macros.c +++ clang/test/Preprocessor/predefined-arch-macros.c @@ -2263,21 +2263,6 @@ // CHECK_X86_64_M32: #define __k8__ 1 // CHECK_X86_64_M32: #define i386 1 -// RUN: %clang -march=x86-64 -m64 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_X86_64_M64 -// CHECK_X86_64_M64: #define __MMX__ 1 -// CHECK_X86_64_M64: #define __SSE2_MATH__ 1 -// CHECK_X86_64_M64: #define __SSE2__ 1 -// CHECK_X86_64_M64: #define __SSE_MATH__ 1 -// CHECK_X86_64_M64: #define __SSE__ 1 -// CHECK_X86_64_M64: #define __amd64 1 -// CHECK_X86_64_M64: #define __amd64__ 1 -// CHECK_X86_64_M64: #define __k8 1 -// CHECK_X86_64_M64: #define __k8__ 1 -// CHECK_X86_64_M64: #define __x86_64 1 -// CHECK_X86_64_M64: #define __x86_64__ 1 - // RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32 Index: clang/test/Preprocessor/predefined-arch-macros-x86.c =================================================================== --- /dev/null +++ clang/test/Preprocessor/predefined-arch-macros-x86.c @@ -0,0 +1,54 @@ +// RUN: %clang -target x86_64 -march=x86-64 -E -dM %s > %tv1 +// RUN: FileCheck %s --check-prefix=X86_64_V1 < %tv1 + +// X86_64_V1: #define __MMX__ 1 +// X86_64_V1: #define __SSE2_MATH__ 1 +// X86_64_V1: #define __SSE2__ 1 +// X86_64_V1: #define __SSE_MATH__ 1 +// X86_64_V1: #define __SSE__ 1 +// X86_64_V1: #define __amd64 1 +// X86_64_V1: #define __amd64__ 1 +// X86_64_V1: #define __k8 1 +// X86_64_V1: #define __k8__ 1 +// X86_64_V1: #define __x86_64 1 +// X86_64_V1: #define __x86_64__ 1 + +// RUN: %clang -target x86_64 -march=x86-64-v2 -E -dM %s > %tv2 +// RUN: diff %tv1 %tv2 > %t.txt || true +// RUN: FileCheck %s --check-prefix=X86_64_V2 < %t.txt + +/// v2 is close to Nehalem. +// X86_64_V2: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1 +// X86_64_V2: #define __LAHF_SAHF__ 1 +// X86_64_V2: #define __POPCNT__ 1 +// X86_64_V2: #define __SSE3__ 1 +// X86_64_V2-NEXT: #define __SSE4_1__ 1 +// X86_64_V2-NEXT: #define __SSE4_2__ 1 +// X86_64_V2: #define __SSSE3__ 1 + +/// v3 is close to Haswell. +// RUN: %clang -target x86_64 -march=x86-64-v3 -E -dM %s > %tv3 +// RUN: diff %tv2 %tv3 > %t.txt || true +// RUN: FileCheck %s --check-prefix=X86_64_V3 < %t.txt + +// X86_64_V3: #define __AVX2__ 1 +// X86_64_V3-NEXT: #define __AVX__ 1 +// X86_64_V3: #define __BMI2__ 1 +// X86_64_V3-NEXT: #define __BMI__ 1 +// X86_64_V3: #define __F16C__ 1 +// X86_64_V3: #define __FMA__ 1 +// X86_64_V3: #define __LZCNT__ 1 +// X86_64_V3: #define __MOVBE__ 1 +// X86_64_V3: #define __XSAVE__ 1 + +/// v4 is close to the AVX-512 level implemented by Xeon Scalable Processors. +// RUN: %clang -target x86_64 -march=x86-64-v4 -E -dM %s > %tv4 +// RUN: diff %tv3 %tv4 > %t.txt || true +// RUN: FileCheck %s --check-prefix=X86_64_V4 < %t.txt + +// X86_64_V4: #define __AVX512BW__ 1 +// X86_64_V4-NEXT: #define __AVX512CD__ 1 +// X86_64_V4-NEXT: #define __AVX512DQ__ 1 +// X86_64_V4-NEXT: #define __AVX512F__ 1 +// X86_64_V4-NEXT: #define __AVX512VL__ 1 +// X86_64_V4-NOT: #define __AVX512{{.*}} Index: clang/test/Misc/target-invalid-cpu-note.c =================================================================== --- clang/test/Misc/target-invalid-cpu-note.c +++ clang/test/Misc/target-invalid-cpu-note.c @@ -25,7 +25,7 @@ // X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, // X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, -// X86-SAME: x86-64, geode +// X86-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 // X86_64: error: unknown target CPU 'not-a-cpu' @@ -35,7 +35,8 @@ // X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, // X86_64-SAME: icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, // X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, -// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64 +// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// X86_64-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 // TUNE_X86: error: unknown target CPU 'not-a-cpu' @@ -49,7 +50,7 @@ // TUNE_X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // TUNE_X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, // TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, -// TUNE_X86-SAME: x86-64, geode +// TUNE_X86-SAME: x86-64, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu' @@ -63,7 +64,7 @@ // TUNE_X86_64-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // TUNE_X86_64-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, // TUNE_X86_64-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, -// TUNE_X86_64-SAME: x86-64, geode +// TUNE_X86_64-SAME: x86-64, geode{{$}} // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' Index: clang/test/Driver/x86-mtune.c =================================================================== --- clang/test/Driver/x86-mtune.c +++ clang/test/Driver/x86-mtune.c @@ -40,3 +40,8 @@ // RUN: | FileCheck %s -check-prefix=marchmtune // marchmtune: "-target-cpu" "core2" // mmarchmtune: "-tune-cpu" "nehalem" + +// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v2 2>&1 | FileCheck %s --check-prefix=INVALID +// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v3 2>&1 | FileCheck %s --check-prefix=INVALID +// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v4 2>&1 | FileCheck %s --check-prefix=INVALID +// INVALID: error: unknown target CPU '{{.*}}' Index: clang/test/Driver/x86-march.c =================================================================== --- clang/test/Driver/x86-march.c +++ clang/test/Driver/x86-march.c @@ -175,3 +175,12 @@ // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \ // RUN: | FileCheck %s -check-prefix=znver2 // znver2: "-target-cpu" "znver2" + +// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64 +// x86-64: "-target-cpu" "x86-64" +// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v2 2>&1 | FileCheck %s --check-prefix=x86-64-v2 +// x86-64-v2: "-target-cpu" "x86-64-v2" +// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v3 2>&1 | FileCheck %s --check-prefix=x86-64-v3 +// x86-64-v3: "-target-cpu" "x86-64-v3" +// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v4 2>&1 | FileCheck %s --check-prefix=x86-64-v4 +// x86-64-v4: "-target-cpu" "x86-64-v4" Index: clang/test/CodeGen/attr-target-x86.c =================================================================== --- clang/test/CodeGen/attr-target-x86.c +++ clang/test/CodeGen/attr-target-x86.c @@ -32,6 +32,10 @@ int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; } +void __attribute__((target("arch=x86-64-v2"))) x86_64_v2() {} +void __attribute__((target("arch=x86-64-v3"))) x86_64_v3() {} +void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {} + // Check that we emit the additional subtarget and cpu features for foo and not for baz or bar. // CHECK: baz{{.*}} #0 // CHECK: foo{{.*}} #1 @@ -59,3 +63,10 @@ // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" // CHECK-NOT: tune-cpu // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge" + +// CHECK: "target-cpu"="x86-64-v2" +// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK: "target-cpu"="x86-64-v3" +// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: "target-cpu"="x86-64-v4" +// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -314,7 +314,7 @@ // Allow 32-bit only CPUs regardless of 64-bit mode unlike isValidCPUName. // NOTE: gcc rejects 32-bit mtune CPUs in 64-bit mode. But being lenient // since mtune was ignored by clang for so long. - return llvm::X86::parseArchX86(Name) != llvm::X86::CK_None; + return llvm::X86::parseTuneCPU(Name) != llvm::X86::CK_None; } void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -506,6 +506,9 @@ case CK_K8: case CK_K8SSE3: case CK_x86_64: + case CK_x86_64_v2: + case CK_x86_64_v3: + case CK_x86_64_v4: defineCPUMacros(Builder, "k8"); break; case CK_AMDFAM10: @@ -1312,6 +1315,9 @@ case CK_ZNVER2: // Deprecated case CK_x86_64: + case CK_x86_64_v2: + case CK_x86_64_v3: + case CK_x86_64_v4: case CK_Yonah: case CK_Penryn: case CK_Core2: @@ -1456,7 +1462,7 @@ } void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const { - llvm::X86::fillValidCPUArchList(Values); + llvm::X86::fillValidTuneCPUList(Values); } ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const { Index: clang/docs/UsersManual.rst =================================================================== --- clang/docs/UsersManual.rst +++ clang/docs/UsersManual.rst @@ -3201,6 +3201,15 @@ appropriate for a CPU running in 16-bit mode, with address-size and operand-size prefixes to enable 32-bit addressing and operations. +Several micro-architecture levels as specified by the x86-64 psABI are defined. +They are cumulative in the sense that features from previous levels are +implicitly included in later levels. + +- ``-march=x86-64``: CMOV, CMPXCHG8B, FPU, FXSR, MMX, FXSR, SCE, SSE, SSE2 +- ``-march=x86-64-v2``: (close to Nehalem) CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3 +- ``-march=x86-64-v3``: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, XSAVE +- ``-march=x86-64-v4``: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL + ARM ^^^ Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -188,7 +188,10 @@ - The x86 intrinsics ``__rorb``, ``__rorw``, ``__rord``, ``__rorq`, ``_rotr``, ``_rotwr`` and ``_lrotr`` may now be used within constant expressions. -- Support for -march=sapphirerapids was added. +- Support for ``-march=sapphirerapids`` was added. + +- Support for ``-march=x86-64-v[234]`` has been added. + See :doc:`UsersManual` for details about these micro-architecture levels. - The -mtune command line option is no longer ignored for X86. This can be used to request microarchitectural optimizations independent on -march. -march=<cpu>
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits