GGanesh updated this revision to Diff 483092. GGanesh added a comment. Update to use 'no' processor model. Fixed the below tests tools/llvm-mca/X86/cpus.s tools/llvm-mca/X86/read-after-ld-1.s tools/llvm-mca/X86/register-file-statistics.s tools/llvm-mca/X86/scheduler-queue-usage.s
Addressed comments from @RKSimon Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D139073/new/ https://reviews.llvm.org/D139073 Files: clang/lib/Basic/Targets/X86.cpp clang/test/CodeGen/target-builtin-noerror.c clang/test/Driver/x86-march.c clang/test/Frontend/x86-target-cpu.c clang/test/Misc/target-invalid-cpu-note.c clang/test/Preprocessor/predefined-arch-macros.c compiler-rt/lib/builtins/cpu_model.c llvm/include/llvm/Support/X86TargetParser.h llvm/lib/Support/Host.cpp llvm/lib/Support/X86TargetParser.cpp llvm/lib/Target/X86/X86.td llvm/lib/Target/X86/X86PfmCounters.td llvm/test/CodeGen/X86/cpus-amd.ll llvm/test/CodeGen/X86/rdpru.ll llvm/test/CodeGen/X86/slow-unaligned-mem.ll llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll llvm/test/MC/X86/x86_long_nop.s llvm/test/tools/llvm-mca/X86/cpus.s llvm/test/tools/llvm-mca/X86/read-after-ld-1.s llvm/test/tools/llvm-mca/X86/register-file-statistics.s llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s =================================================================== --- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -112,6 +112,12 @@ # ZNVER3-NEXT: [3] Maximum number of used buffer entries. # ZNVER3-NEXT: [4] Total number of buffer entries. +# ZNVER4: Scheduler's queue usage: +# ZNVER4-NEXT: [1] Resource name. +# ZNVER4-NEXT: [2] Average number of used buffer entries. +# ZNVER4-NEXT: [3] Maximum number of used buffer entries. +# ZNVER4-NEXT: [4] Total number of buffer entries. + # BARCELONA: [1] [2] [3] [4] # BARCELONA-NEXT: SBPortAny 0 1 54 @@ -165,3 +171,9 @@ # ZNVER3-NEXT: Zn3Int 0 1 96 # ZNVER3-NEXT: Zn3Load 0 0 72 # ZNVER3-NEXT: Zn3Store 0 0 64 + +# ZNVER4: [1] [2] [3] [4] +# ZNVER4-NEXT: Zn3FP 0 0 64 +# ZNVER4-NEXT: Zn3Int 0 1 96 +# ZNVER4-NEXT: Zn3Load 0 0 72 +# ZNVER4-NEXT: Zn3Store 0 0 64 Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s =================================================================== --- llvm/test/tools/llvm-mca/X86/register-file-statistics.s +++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s @@ -54,6 +54,11 @@ # ZNVER3-NEXT: Total number of mappings created: 0 # ZNVER3-NEXT: Max number of mappings used: 0 +# ZNVER4: * Register File #1 -- Zn3FpPRF: +# ZNVER4-NEXT: Number of physical registers: 160 +# ZNVER4-NEXT: Total number of mappings created: 0 +# ZNVER4-NEXT: Max number of mappings used: 0 + # BDVER2: * Register File #2 -- PdIntegerPRF: # BDVER2-NEXT: Number of physical registers: 96 # BDVER2-NEXT: Total number of mappings created: 2 @@ -78,3 +83,8 @@ # ZNVER3-NEXT: Number of physical registers: 192 # ZNVER3-NEXT: Total number of mappings created: 2 # ZNVER3-NEXT: Max number of mappings used: 2 + +# ZNVER4: * Register File #2 -- Zn3IntegerPRF: +# ZNVER4-NEXT: Number of physical registers: 192 +# ZNVER4-NEXT: Total number of mappings created: 2 +# ZNVER4-NEXT: Max number of mappings used: 2 Index: llvm/test/tools/llvm-mca/X86/read-after-ld-1.s =================================================================== --- llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -47,6 +47,9 @@ # ZNVER3-NEXT: Total Cycles: 17 # ZNVER3-NEXT: Total uOps: 2 +# ZNVER4-NEXT: Total Cycles: 17 +# ZNVER4-NEXT: Total uOps: 2 + # BARCELONA: Dispatch Width: 4 # BARCELONA-NEXT: uOps Per Cycle: 0.15 # BARCELONA-NEXT: IPC: 0.10 @@ -97,6 +100,11 @@ # ZNVER3-NEXT: IPC: 0.12 # ZNVER3-NEXT: Block RThroughput: 3.0 +# ZNVER4: Dispatch Width: 6 +# ZNVER4-NEXT: uOps Per Cycle: 0.12 +# ZNVER4-NEXT: IPC: 0.12 +# ZNVER4-NEXT: Block RThroughput: 3.0 + # ALL: Timeline view: # BARCELONA-NEXT: 0123456789 @@ -129,6 +137,9 @@ # ZNVER3-NEXT: 0123456 # ZNVER3-NEXT: Index 0123456789 +# ZNVER4-NEXT: 0123456 +# ZNVER4-NEXT: Index 0123456789 + # BARCELONA: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # BARCELONA-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 @@ -159,6 +170,9 @@ # ZNVER3: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 # ZNVER3-NEXT: [0,1] D====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# ZNVER4: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 +# ZNVER4-NEXT: [0,1] D====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -197,3 +211,6 @@ # ZNVER3-NEXT: 1. 1 5.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 # ZNVER3-NEXT: 1 3.0 0.5 0.0 <total> + +# ZNVER4-NEXT: 1. 1 5.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# ZNVER4-NEXT: 1 3.0 0.5 0.0 <total> Index: llvm/test/tools/llvm-mca/X86/cpus.s =================================================================== --- llvm/test/tools/llvm-mca/X86/cpus.s +++ llvm/test/tools/llvm-mca/X86/cpus.s @@ -99,3 +99,8 @@ # ZNVER3-NEXT: uOps Per Cycle: 0.97 # ZNVER3-NEXT: IPC: 0.97 # ZNVER3-NEXT: Block RThroughput: 0.3 + +# ZNVER4: Dispatch Width: 6 +# ZNVER4-NEXT: uOps Per Cycle: 0.97 +# ZNVER4-NEXT: IPC: 0.97 +# ZNVER4-NEXT: Block RThroughput: 0.3 Index: llvm/test/MC/X86/x86_long_nop.s =================================================================== --- llvm/test/MC/X86/x86_long_nop.s +++ llvm/test/MC/X86/x86_long_nop.s @@ -17,6 +17,8 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s Index: llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll =================================================================== --- llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -15,6 +15,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s ; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' Index: llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll =================================================================== --- llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll +++ llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll @@ -7,6 +7,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver1 | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X86-64 define float @f32_no_daz(float %f) #0 { Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll =================================================================== --- llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -49,6 +49,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses Index: llvm/test/CodeGen/X86/rdpru.ll =================================================================== --- llvm/test/CodeGen/X86/rdpru.ll +++ llvm/test/CodeGen/X86/rdpru.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+rdpru -fast-isel | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 -fast-isel | FileCheck %s --check-prefix=X64 define void @rdpru_asm() { ; X86-LABEL: rdpru_asm: Index: llvm/test/CodeGen/X86/cpus-amd.ll =================================================================== --- llvm/test/CodeGen/X86/cpus-amd.ll +++ llvm/test/CodeGen/X86/cpus-amd.ll @@ -28,6 +28,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void Index: llvm/lib/Target/X86/X86PfmCounters.td =================================================================== --- llvm/lib/Target/X86/X86PfmCounters.td +++ llvm/lib/Target/X86/X86PfmCounters.td @@ -289,3 +289,4 @@ ]; } def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>; +def : PfmCountersBinding<"znver4", ZnVer3PfmCounters>; Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -1295,6 +1295,23 @@ !listconcat(ZN2Tuning, ZN3AdditionalTuning); list<SubtargetFeature> ZN3Features = !listconcat(ZN2Features, ZN3AdditionalFeatures); + list<SubtargetFeature> ZN4Tuning = ZN3Tuning; + list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, + FeatureCDI, + FeatureDQI, + FeatureBWI, + FeatureVLX, + FeatureVBMI, + FeatureVBMI2, + FeatureIFMA, + FeatureVNNI, + FeatureBITALG, + FeatureGFNI, + FeatureBF16, + FeatureSHSTK, + FeatureVPOPCNTDQ]; + list<SubtargetFeature> ZN4Features = + !listconcat(ZN3Features, ZN4AdditionalFeatures); } //===----------------------------------------------------------------------===// @@ -1599,6 +1616,8 @@ ProcessorFeatures.ZN2Tuning>; def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, ProcessorFeatures.ZN3Tuning>; +def : Proc<"znver4",ProcessorFeatures.ZN4Features, + ProcessorFeatures.ZN4Tuning>; def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; Index: llvm/lib/Support/X86TargetParser.cpp =================================================================== --- llvm/lib/Support/X86TargetParser.cpp +++ llvm/lib/Support/X86TargetParser.cpp @@ -298,6 +298,12 @@ static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 | FeatureINVPCID | FeaturePKU | FeatureVAES | FeatureVPCLMULQDQ; +static constexpr FeatureBitset FeaturesZNVER4 = + FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ | + FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI | + FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG | + FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI | + FeatureSHSTK; constexpr ProcInfo Processors[] = { // Empty processor. Include X87 and CMPXCHG8 for backwards compatibility. @@ -425,6 +431,7 @@ { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 }, { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 }, { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3 }, + { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4 }, // Generic 64-bit processor. { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 }, { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 }, Index: llvm/lib/Support/Host.cpp =================================================================== --- llvm/lib/Support/Host.cpp +++ llvm/lib/Support/Host.cpp @@ -1105,7 +1105,15 @@ *Subtype = X86::AMDFAM19H_ZNVER3; break; } - break; + if ((Model >= 0x10 && Model <= 0x1f) || + (Model >= 0x60 && Model <= 0x74) || + (Model >= 0x78 && Model <= 0x7b) || + (Model >= 0xA0 && Model <= 0xAf)) { + CPU = "znver4"; + *Subtype = X86::AMDFAM19H_ZNVER4; + break; // "znver4" + } + break; // family 19h default: break; // Unknown AMD CPU. } Index: llvm/include/llvm/Support/X86TargetParser.h =================================================================== --- llvm/include/llvm/Support/X86TargetParser.h +++ llvm/include/llvm/Support/X86TargetParser.h @@ -129,6 +129,7 @@ CK_ZNVER1, CK_ZNVER2, CK_ZNVER3, + CK_ZNVER4, CK_x86_64, CK_x86_64_v2, CK_x86_64_v3, Index: compiler-rt/lib/builtins/cpu_model.c =================================================================== --- compiler-rt/lib/builtins/cpu_model.c +++ compiler-rt/lib/builtins/cpu_model.c @@ -627,6 +627,14 @@ *Subtype = AMDFAM19H_ZNVER3; break; } + if ((Model >= 0x10 && Model <= 0x1f) || + (Model >= 0x60 && Model <= 0x74) || + (Model >= 0x78 && Model <= 0x7b) || + (Model >= 0xA0 && Model <= 0xAf)) { + CPU = "znver4"; + *Subtype = AMDFAM19H_ZNVER4; + break; // "znver4" + } break; default: break; // Unknown AMD CPU. Index: clang/test/Preprocessor/predefined-arch-macros.c =================================================================== --- clang/test/Preprocessor/predefined-arch-macros.c +++ clang/test/Preprocessor/predefined-arch-macros.c @@ -3832,6 +3832,138 @@ // CHECK_ZNVER3_M64: #define __znver3 1 // CHECK_ZNVER3_M64: #define __znver3__ 1 +// RUN: %clang -march=znver4 -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M32 +// CHECK_ZNVER4_M32-NOT: #define __3dNOW_A__ 1 +// CHECK_ZNVER4_M32-NOT: #define __3dNOW__ 1 +// CHECK_ZNVER4_M32: #define __ADX__ 1 +// CHECK_ZNVER4_M32: #define __AES__ 1 +// CHECK_ZNVER4_M32: #define __AVX2__ 1 +// CHECK_ZNVER4_M32: #define __AVX512BF16__ 1 +// CHECK_ZNVER4_M32: #define __AVX512BITALG__ 1 +// CHECK_ZNVER4_M32: #define __AVX512BW__ 1 +// CHECK_ZNVER4_M32: #define __AVX512CD__ 1 +// CHECK_ZNVER4_M32: #define __AVX512DQ__ 1 +// CHECK_ZNVER4_M32: #define __AVX512F__ 1 +// CHECK_ZNVER4_M32: #define __AVX512IFMA__ 1 +// CHECK_ZNVER4_M32: #define __AVX512VBMI2__ 1 +// CHECK_ZNVER4_M32: #define __AVX512VBMI__ 1 +// CHECK_ZNVER4_M32: #define __AVX512VL__ 1 +// CHECK_ZNVER4_M32: #define __AVX512VNNI__ 1 +// CHECK_ZNVER4_M32: #define __AVX512VPOPCNTDQ__ 1 +// CHECK_ZNVER4_M32: #define __AVX__ 1 +// CHECK_ZNVER4_M32: #define __BMI2__ 1 +// CHECK_ZNVER4_M32: #define __BMI__ 1 +// CHECK_ZNVER4_M32: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER4_M32: #define __CLWB__ 1 +// CHECK_ZNVER4_M32: #define __CLZERO__ 1 +// CHECK_ZNVER4_M32: #define __F16C__ 1 +// CHECK_ZNVER4_M32-NOT: #define __FMA4__ 1 +// CHECK_ZNVER4_M32: #define __FMA__ 1 +// CHECK_ZNVER4_M32: #define __FSGSBASE__ 1 +// CHECK_ZNVER4_M32: #define __GFNI__ 1 +// CHECK_ZNVER4_M32: #define __LZCNT__ 1 +// CHECK_ZNVER4_M32: #define __MMX__ 1 +// CHECK_ZNVER4_M32: #define __PCLMUL__ 1 +// CHECK_ZNVER4_M32: #define __PKU__ 1 +// CHECK_ZNVER4_M32: #define __POPCNT__ 1 +// CHECK_ZNVER4_M32: #define __PRFCHW__ 1 +// CHECK_ZNVER4_M32: #define __RDPID__ 1 +// CHECK_ZNVER4_M32: #define __RDPRU__ 1 +// CHECK_ZNVER4_M32: #define __RDRND__ 1 +// CHECK_ZNVER4_M32: #define __RDSEED__ 1 +// CHECK_ZNVER4_M32: #define __SHA__ 1 +// CHECK_ZNVER4_M32: #define __SSE2_MATH__ 1 +// CHECK_ZNVER4_M32: #define __SSE2__ 1 +// CHECK_ZNVER4_M32: #define __SSE3__ 1 +// CHECK_ZNVER4_M32: #define __SSE4A__ 1 +// CHECK_ZNVER4_M32: #define __SSE4_1__ 1 +// CHECK_ZNVER4_M32: #define __SSE4_2__ 1 +// CHECK_ZNVER4_M32: #define __SSE_MATH__ 1 +// CHECK_ZNVER4_M32: #define __SSE__ 1 +// CHECK_ZNVER4_M32: #define __SSSE3__ 1 +// CHECK_ZNVER4_M32-NOT: #define __TBM__ 1 +// CHECK_ZNVER4_M32: #define __WBNOINVD__ 1 +// CHECK_ZNVER4_M32-NOT: #define __XOP__ 1 +// CHECK_ZNVER4_M32: #define __XSAVEC__ 1 +// CHECK_ZNVER4_M32: #define __XSAVEOPT__ 1 +// CHECK_ZNVER4_M32: #define __XSAVES__ 1 +// CHECK_ZNVER4_M32: #define __XSAVE__ 1 +// CHECK_ZNVER4_M32: #define __i386 1 +// CHECK_ZNVER4_M32: #define __i386__ 1 +// CHECK_ZNVER4_M32: #define __tune_znver4__ 1 +// CHECK_ZNVER4_M32: #define __znver4 1 +// CHECK_ZNVER4_M32: #define __znver4__ 1 + +// RUN: %clang -march=znver4 -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M64 +// CHECK_ZNVER4_M64-NOT: #define __3dNOW_A__ 1 +// CHECK_ZNVER4_M64-NOT: #define __3dNOW__ 1 +// CHECK_ZNVER4_M64: #define __ADX__ 1 +// CHECK_ZNVER4_M64: #define __AES__ 1 +// CHECK_ZNVER4_M64: #define __AVX2__ 1 +// CHECK_ZNVER4_M64: #define __AVX512BF16__ 1 +// CHECK_ZNVER4_M64: #define __AVX512BITALG__ 1 +// CHECK_ZNVER4_M64: #define __AVX512BW__ 1 +// CHECK_ZNVER4_M64: #define __AVX512CD__ 1 +// CHECK_ZNVER4_M64: #define __AVX512DQ__ 1 +// CHECK_ZNVER4_M64: #define __AVX512F__ 1 +// CHECK_ZNVER4_M64: #define __AVX512IFMA__ 1 +// CHECK_ZNVER4_M64: #define __AVX512VBMI2__ 1 +// CHECK_ZNVER4_M64: #define __AVX512VBMI__ 1 +// CHECK_ZNVER4_M64: #define __AVX512VL__ 1 +// CHECK_ZNVER4_M64: #define __AVX512VNNI__ 1 +// CHECK_ZNVER4_M64: #define __AVX512VPOPCNTDQ__ 1 +// CHECK_ZNVER4_M64: #define __AVX__ 1 +// CHECK_ZNVER4_M64: #define __BMI2__ 1 +// CHECK_ZNVER4_M64: #define __BMI__ 1 +// CHECK_ZNVER4_M64: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER4_M64: #define __CLWB__ 1 +// CHECK_ZNVER4_M64: #define __CLZERO__ 1 +// CHECK_ZNVER4_M64: #define __F16C__ 1 +// CHECK_ZNVER4_M64-NOT: #define __FMA4__ 1 +// CHECK_ZNVER4_M64: #define __FMA__ 1 +// CHECK_ZNVER4_M64: #define __FSGSBASE__ 1 +// CHECK_ZNVER4_M64: #define __GFNI__ 1 +// CHECK_ZNVER4_M64: #define __LZCNT__ 1 +// CHECK_ZNVER4_M64: #define __MMX__ 1 +// CHECK_ZNVER4_M64: #define __PCLMUL__ 1 +// CHECK_ZNVER4_M64: #define __PKU__ 1 +// CHECK_ZNVER4_M64: #define __POPCNT__ 1 +// CHECK_ZNVER4_M64: #define __PRFCHW__ 1 +// CHECK_ZNVER4_M64: #define __RDPID__ 1 +// CHECK_ZNVER4_M64: #define __RDPRU__ 1 +// CHECK_ZNVER4_M64: #define __RDRND__ 1 +// CHECK_ZNVER4_M64: #define __RDSEED__ 1 +// CHECK_ZNVER4_M64: #define __SHA__ 1 +// CHECK_ZNVER4_M64: #define __SSE2_MATH__ 1 +// CHECK_ZNVER4_M64: #define __SSE2__ 1 +// CHECK_ZNVER4_M64: #define __SSE3__ 1 +// CHECK_ZNVER4_M64: #define __SSE4A__ 1 +// CHECK_ZNVER4_M64: #define __SSE4_1__ 1 +// CHECK_ZNVER4_M64: #define __SSE4_2__ 1 +// CHECK_ZNVER4_M64: #define __SSE_MATH__ 1 +// CHECK_ZNVER4_M64: #define __SSE__ 1 +// CHECK_ZNVER4_M64: #define __SSSE3__ 1 +// CHECK_ZNVER4_M64-NOT: #define __TBM__ 1 +// CHECK_ZNVER4_M64: #define __VAES__ 1 +// CHECK_ZNVER4_M64: #define __VPCLMULQDQ__ 1 +// CHECK_ZNVER4_M64: #define __WBNOINVD__ 1 +// CHECK_ZNVER4_M64-NOT: #define __XOP__ 1 +// CHECK_ZNVER4_M64: #define __XSAVEC__ 1 +// CHECK_ZNVER4_M64: #define __XSAVEOPT__ 1 +// CHECK_ZNVER4_M64: #define __XSAVES__ 1 +// CHECK_ZNVER4_M64: #define __XSAVE__ 1 +// CHECK_ZNVER4_M64: #define __amd64 1 +// CHECK_ZNVER4_M64: #define __amd64__ 1 +// CHECK_ZNVER4_M64: #define __tune_znver4__ 1 +// CHECK_ZNVER4_M64: #define __x86_64 1 +// CHECK_ZNVER4_M64: #define __x86_64__ 1 +// CHECK_ZNVER4_M64: #define __znver4 1 +// CHECK_ZNVER4_M64: #define __znver4__ 1 + // End X86/GCC/Linux tests ------------------ // Begin PPC/GCC/Linux tests ---------------- Index: clang/test/Misc/target-invalid-cpu-note.c =================================================================== --- clang/test/Misc/target-invalid-cpu-note.c +++ clang/test/Misc/target-invalid-cpu-note.c @@ -13,19 +13,19 @@ // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' -// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} +// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 // X86_64: error: unknown target CPU 'not-a-cpu' -// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} +// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 // TUNE_X86: error: unknown target CPU 'not-a-cpu' -// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} +// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu' -// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} +// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' Index: clang/test/Frontend/x86-target-cpu.c =================================================================== --- clang/test/Frontend/x86-target-cpu.c +++ clang/test/Frontend/x86-target-cpu.c @@ -37,5 +37,6 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s // // expected-no-diagnostics Index: clang/test/Driver/x86-march.c =================================================================== --- clang/test/Driver/x86-march.c +++ clang/test/Driver/x86-march.c @@ -206,6 +206,10 @@ // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver3 2>&1 \ // RUN: | FileCheck %s -check-prefix=znver3 // znver3: "-target-cpu" "znver3" +// +// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver4 2>&1 \ +// RUN: | FileCheck %s -check-prefix=znver4 +// znver4: "-target-cpu" "znver4" // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64 // x86-64: "-target-cpu" "x86-64" Index: clang/test/CodeGen/target-builtin-noerror.c =================================================================== --- clang/test/CodeGen/target-builtin-noerror.c +++ clang/test/CodeGen/target-builtin-noerror.c @@ -135,4 +135,5 @@ (void)__builtin_cpu_is("znver1"); (void)__builtin_cpu_is("znver2"); (void)__builtin_cpu_is("znver3"); + (void)__builtin_cpu_is("znver4"); } Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -607,6 +607,9 @@ case CK_ZNVER3: defineCPUMacros(Builder, "znver3"); break; + case CK_ZNVER4: + defineCPUMacros(Builder, "znver4"); + break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; @@ -1440,6 +1443,7 @@ case CK_ZNVER1: case CK_ZNVER2: case CK_ZNVER3: + case CK_ZNVER4: // Deprecated case CK_x86_64: case CK_x86_64_v2:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits