GGanesh updated this revision to Diff 483092.
GGanesh added a comment.

Update to use 'no' processor model.
Fixed the below tests
tools/llvm-mca/X86/cpus.s
tools/llvm-mca/X86/read-after-ld-1.s
tools/llvm-mca/X86/register-file-statistics.s
tools/llvm-mca/X86/scheduler-queue-usage.s

Addressed comments from @RKSimon


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s
  llvm/test/tools/llvm-mca/X86/cpus.s
  llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
  llvm/test/tools/llvm-mca/X86/register-file-statistics.s
  llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s

Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
===================================================================
--- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -112,6 +112,12 @@
 # ZNVER3-NEXT:     [3] Maximum number of used buffer entries.
 # ZNVER3-NEXT:     [4] Total number of buffer entries.
 
+# ZNVER4:          Scheduler's queue usage:
+# ZNVER4-NEXT:     [1] Resource name.
+# ZNVER4-NEXT:     [2] Average number of used buffer entries.
+# ZNVER4-NEXT:     [3] Maximum number of used buffer entries.
+# ZNVER4-NEXT:     [4] Total number of buffer entries.
+
 # BARCELONA:        [1]            [2]        [3]        [4]
 # BARCELONA-NEXT:  SBPortAny        0          1          54
 
@@ -165,3 +171,9 @@
 # ZNVER3-NEXT:     Zn3Int           0          1          96
 # ZNVER3-NEXT:     Zn3Load          0          0          72
 # ZNVER3-NEXT:     Zn3Store         0          0          64
+
+# ZNVER4:           [1]            [2]        [3]        [4]
+# ZNVER4-NEXT:     Zn3FP            0          0          64
+# ZNVER4-NEXT:     Zn3Int           0          1          96
+# ZNVER4-NEXT:     Zn3Load          0          0          72
+# ZNVER4-NEXT:     Zn3Store         0          0          64
Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s
===================================================================
--- llvm/test/tools/llvm-mca/X86/register-file-statistics.s
+++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s
@@ -54,6 +54,11 @@
 # ZNVER3-NEXT:       Total number of mappings created: 0
 # ZNVER3-NEXT:       Max number of mappings used:      0
 
+# ZNVER4:         *  Register File #1 -- Zn3FpPRF:
+# ZNVER4-NEXT:       Number of physical registers:     160
+# ZNVER4-NEXT:       Total number of mappings created: 0
+# ZNVER4-NEXT:       Max number of mappings used:      0
+
 # BDVER2:         *  Register File #2 -- PdIntegerPRF:
 # BDVER2-NEXT:       Number of physical registers:     96
 # BDVER2-NEXT:       Total number of mappings created: 2
@@ -78,3 +83,8 @@
 # ZNVER3-NEXT:       Number of physical registers:     192
 # ZNVER3-NEXT:       Total number of mappings created: 2
 # ZNVER3-NEXT:       Max number of mappings used:      2
+
+# ZNVER4:         *  Register File #2 -- Zn3IntegerPRF:
+# ZNVER4-NEXT:       Number of physical registers:     192
+# ZNVER4-NEXT:       Total number of mappings created: 2
+# ZNVER4-NEXT:       Max number of mappings used:      2
Index: llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
===================================================================
--- llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
+++ llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
@@ -47,6 +47,9 @@
 # ZNVER3-NEXT:    Total Cycles:      17
 # ZNVER3-NEXT:    Total uOps:        2
 
+# ZNVER4-NEXT:    Total Cycles:      17
+# ZNVER4-NEXT:    Total uOps:        2
+
 # BARCELONA:      Dispatch Width:    4
 # BARCELONA-NEXT: uOps Per Cycle:    0.15
 # BARCELONA-NEXT: IPC:               0.10
@@ -97,6 +100,11 @@
 # ZNVER3-NEXT:    IPC:               0.12
 # ZNVER3-NEXT:    Block RThroughput: 3.0
 
+# ZNVER4:         Dispatch Width:    6
+# ZNVER4-NEXT:    uOps Per Cycle:    0.12
+# ZNVER4-NEXT:    IPC:               0.12
+# ZNVER4-NEXT:    Block RThroughput: 3.0
+
 # ALL:            Timeline view:
 
 # BARCELONA-NEXT:                     0123456789
@@ -129,6 +137,9 @@
 # ZNVER3-NEXT:                        0123456
 # ZNVER3-NEXT:    Index     0123456789
 
+# ZNVER4-NEXT:                        0123456
+# ZNVER4-NEXT:    Index     0123456789
+
 # BARCELONA:      [0,0]     DeeeeeeeeeeeeeeER  .   vdivps	%xmm0, %xmm1, %xmm1
 # BARCELONA-NEXT: [0,1]     D========eeeeeeeeeER   vaddps	(%rax), %xmm1, %xmm1
 
@@ -159,6 +170,9 @@
 # ZNVER3:         [0,0]     DeeeeeeeeeeeER ..   vdivps	%xmm0, %xmm1, %xmm1
 # ZNVER3-NEXT:    [0,1]     D====eeeeeeeeeeER   vaddps	(%rax), %xmm1, %xmm1
 
+# ZNVER4:         [0,0]     DeeeeeeeeeeeER ..   vdivps	%xmm0, %xmm1, %xmm1
+# ZNVER4-NEXT:    [0,1]     D====eeeeeeeeeeER   vaddps	(%rax), %xmm1, %xmm1
+
 # ALL:            Average Wait times (based on the timeline view):
 # ALL-NEXT:       [0]: Executions
 # ALL-NEXT:       [1]: Average time spent waiting in a scheduler's queue
@@ -197,3 +211,6 @@
 
 # ZNVER3-NEXT:    1.     1     5.0    0.0    0.0       vaddps	(%rax), %xmm1, %xmm1
 # ZNVER3-NEXT:           1     3.0    0.5    0.0       <total>
+
+# ZNVER4-NEXT:    1.     1     5.0    0.0    0.0       vaddps	(%rax), %xmm1, %xmm1
+# ZNVER4-NEXT:           1     3.0    0.5    0.0       <total>
Index: llvm/test/tools/llvm-mca/X86/cpus.s
===================================================================
--- llvm/test/tools/llvm-mca/X86/cpus.s
+++ llvm/test/tools/llvm-mca/X86/cpus.s
@@ -99,3 +99,8 @@
 # ZNVER3-NEXT:      uOps Per Cycle:    0.97
 # ZNVER3-NEXT:      IPC:               0.97
 # ZNVER3-NEXT:      Block RThroughput: 0.3
+
+# ZNVER4:           Dispatch Width:    6
+# ZNVER4-NEXT:      uOps Per Cycle:    0.97
+# ZNVER4-NEXT:      IPC:               0.97
+# ZNVER4-NEXT:      Block RThroughput: 0.3
Index: llvm/test/MC/X86/x86_long_nop.s
===================================================================
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -17,6 +17,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
===================================================================
--- llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -15,6 +15,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
Index: llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
===================================================================
--- llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -7,6 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver1 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
 
Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
===================================================================
--- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake     | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64      | FileCheck %s --check-prefixes=X86-64
 
 define float @f32_no_daz(float %f) #0 {
Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll
===================================================================
--- llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -49,6 +49,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
Index: llvm/test/CodeGen/X86/rdpru.ll
===================================================================
--- llvm/test/CodeGen/X86/rdpru.ll
+++ llvm/test/CodeGen/X86/rdpru.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+rdpru -fast-isel | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 -fast-isel | FileCheck %s --check-prefix=X64
 
 define void @rdpru_asm() {
 ; X86-LABEL: rdpru_asm:
Index: llvm/test/CodeGen/X86/cpus-amd.ll
===================================================================
--- llvm/test/CodeGen/X86/cpus-amd.ll
+++ llvm/test/CodeGen/X86/cpus-amd.ll
@@ -28,6 +28,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86PfmCounters.td
===================================================================
--- llvm/lib/Target/X86/X86PfmCounters.td
+++ llvm/lib/Target/X86/X86PfmCounters.td
@@ -289,3 +289,4 @@
   ];
 }
 def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;
+def : PfmCountersBinding<"znver4", ZnVer3PfmCounters>;
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -1295,6 +1295,23 @@
     !listconcat(ZN2Tuning, ZN3AdditionalTuning);
   list<SubtargetFeature> ZN3Features =
     !listconcat(ZN2Features, ZN3AdditionalFeatures);
+  list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
+  list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
+                                                  FeatureCDI,
+                                                  FeatureDQI,
+                                                  FeatureBWI,
+                                                  FeatureVLX,
+                                                  FeatureVBMI,
+                                                  FeatureVBMI2,
+                                                  FeatureIFMA,
+                                                  FeatureVNNI,
+                                                  FeatureBITALG,
+                                                  FeatureGFNI,
+                                                  FeatureBF16,
+                                                  FeatureSHSTK,
+                                                  FeatureVPOPCNTDQ];
+  list<SubtargetFeature> ZN4Features =
+    !listconcat(ZN3Features, ZN4AdditionalFeatures);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1599,6 +1616,8 @@
                 ProcessorFeatures.ZN2Tuning>;
 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
                 ProcessorFeatures.ZN3Tuning>;
+def : Proc<"znver4",ProcessorFeatures.ZN4Features,
+           ProcessorFeatures.ZN4Tuning>;
 
 def : Proc<"geode",           [FeatureX87, FeatureCX8, Feature3DNowA],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
Index: llvm/lib/Support/X86TargetParser.cpp
===================================================================
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -298,6 +298,12 @@
 static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
                                                 FeatureINVPCID | FeaturePKU |
                                                 FeatureVAES | FeatureVPCLMULQDQ;
+static constexpr FeatureBitset FeaturesZNVER4 =
+    FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
+    FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
+    FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG |
+    FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI |
+    FeatureSHSTK;
 
 constexpr ProcInfo Processors[] = {
   // Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
@@ -425,6 +431,7 @@
   { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 },
   { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
   { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3 },
+  { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4 },
   // Generic 64-bit processor.
   { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
   { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1105,7 +1105,15 @@
       *Subtype = X86::AMDFAM19H_ZNVER3;
       break;
     }
-    break;
+    if ((Model >= 0x10 && Model <= 0x1f) ||
+        (Model >= 0x60 && Model <= 0x74) ||
+        (Model >= 0x78 && Model <= 0x7b) ||
+        (Model >= 0xA0 && Model <= 0xAf)) {
+      CPU = "znver4";
+      *Subtype = X86::AMDFAM19H_ZNVER4;
+      break; //  "znver4"
+    }
+    break; // family 19h 
   default:
     break; // Unknown AMD CPU.
   }
Index: llvm/include/llvm/Support/X86TargetParser.h
===================================================================
--- llvm/include/llvm/Support/X86TargetParser.h
+++ llvm/include/llvm/Support/X86TargetParser.h
@@ -129,6 +129,7 @@
   CK_ZNVER1,
   CK_ZNVER2,
   CK_ZNVER3,
+  CK_ZNVER4,
   CK_x86_64,
   CK_x86_64_v2,
   CK_x86_64_v3,
Index: compiler-rt/lib/builtins/cpu_model.c
===================================================================
--- compiler-rt/lib/builtins/cpu_model.c
+++ compiler-rt/lib/builtins/cpu_model.c
@@ -627,6 +627,14 @@
       *Subtype = AMDFAM19H_ZNVER3;
       break;
     }
+    if ((Model >= 0x10 && Model <= 0x1f) ||
+        (Model >= 0x60 && Model <= 0x74) ||
+        (Model >= 0x78 && Model <= 0x7b) ||
+        (Model >= 0xA0 && Model <= 0xAf)) {
+      CPU = "znver4";
+      *Subtype = AMDFAM19H_ZNVER4;
+      break; //  "znver4"
+    }
     break;
   default:
     break; // Unknown AMD CPU.
Index: clang/test/Preprocessor/predefined-arch-macros.c
===================================================================
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -3832,6 +3832,138 @@
 // CHECK_ZNVER3_M64: #define __znver3 1
 // CHECK_ZNVER3_M64: #define __znver3__ 1
 
+// RUN: %clang -march=znver4 -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M32
+// CHECK_ZNVER4_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER4_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER4_M32: #define __ADX__ 1
+// CHECK_ZNVER4_M32: #define __AES__ 1
+// CHECK_ZNVER4_M32: #define __AVX2__ 1
+// CHECK_ZNVER4_M32: #define __AVX512BF16__ 1
+// CHECK_ZNVER4_M32: #define __AVX512BITALG__ 1
+// CHECK_ZNVER4_M32: #define __AVX512BW__ 1
+// CHECK_ZNVER4_M32: #define __AVX512CD__ 1
+// CHECK_ZNVER4_M32: #define __AVX512DQ__ 1
+// CHECK_ZNVER4_M32: #define __AVX512F__ 1
+// CHECK_ZNVER4_M32: #define __AVX512IFMA__ 1
+// CHECK_ZNVER4_M32: #define __AVX512VBMI2__ 1
+// CHECK_ZNVER4_M32: #define __AVX512VBMI__ 1
+// CHECK_ZNVER4_M32: #define __AVX512VL__ 1
+// CHECK_ZNVER4_M32: #define __AVX512VNNI__ 1
+// CHECK_ZNVER4_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ZNVER4_M32: #define __AVX__ 1
+// CHECK_ZNVER4_M32: #define __BMI2__ 1
+// CHECK_ZNVER4_M32: #define __BMI__ 1
+// CHECK_ZNVER4_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER4_M32: #define __CLWB__ 1
+// CHECK_ZNVER4_M32: #define __CLZERO__ 1
+// CHECK_ZNVER4_M32: #define __F16C__ 1
+// CHECK_ZNVER4_M32-NOT: #define __FMA4__ 1
+// CHECK_ZNVER4_M32: #define __FMA__ 1
+// CHECK_ZNVER4_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER4_M32: #define __GFNI__ 1
+// CHECK_ZNVER4_M32: #define __LZCNT__ 1
+// CHECK_ZNVER4_M32: #define __MMX__ 1
+// CHECK_ZNVER4_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER4_M32: #define __PKU__ 1
+// CHECK_ZNVER4_M32: #define __POPCNT__ 1
+// CHECK_ZNVER4_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER4_M32: #define __RDPID__ 1
+// CHECK_ZNVER4_M32: #define __RDPRU__ 1
+// CHECK_ZNVER4_M32: #define __RDRND__ 1
+// CHECK_ZNVER4_M32: #define __RDSEED__ 1
+// CHECK_ZNVER4_M32: #define __SHA__ 1
+// CHECK_ZNVER4_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER4_M32: #define __SSE2__ 1
+// CHECK_ZNVER4_M32: #define __SSE3__ 1
+// CHECK_ZNVER4_M32: #define __SSE4A__ 1
+// CHECK_ZNVER4_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER4_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER4_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER4_M32: #define __SSE__ 1
+// CHECK_ZNVER4_M32: #define __SSSE3__ 1
+// CHECK_ZNVER4_M32-NOT: #define __TBM__ 1
+// CHECK_ZNVER4_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER4_M32-NOT: #define __XOP__ 1
+// CHECK_ZNVER4_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER4_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER4_M32: #define __XSAVES__ 1
+// CHECK_ZNVER4_M32: #define __XSAVE__ 1
+// CHECK_ZNVER4_M32: #define __i386 1
+// CHECK_ZNVER4_M32: #define __i386__ 1
+// CHECK_ZNVER4_M32: #define __tune_znver4__ 1
+// CHECK_ZNVER4_M32: #define __znver4 1
+// CHECK_ZNVER4_M32: #define __znver4__ 1
+
+// RUN: %clang -march=znver4 -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M64
+// CHECK_ZNVER4_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER4_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER4_M64: #define __ADX__ 1
+// CHECK_ZNVER4_M64: #define __AES__ 1
+// CHECK_ZNVER4_M64: #define __AVX2__ 1
+// CHECK_ZNVER4_M64: #define __AVX512BF16__ 1
+// CHECK_ZNVER4_M64: #define __AVX512BITALG__ 1
+// CHECK_ZNVER4_M64: #define __AVX512BW__ 1
+// CHECK_ZNVER4_M64: #define __AVX512CD__ 1
+// CHECK_ZNVER4_M64: #define __AVX512DQ__ 1
+// CHECK_ZNVER4_M64: #define __AVX512F__ 1
+// CHECK_ZNVER4_M64: #define __AVX512IFMA__ 1
+// CHECK_ZNVER4_M64: #define __AVX512VBMI2__ 1
+// CHECK_ZNVER4_M64: #define __AVX512VBMI__ 1
+// CHECK_ZNVER4_M64: #define __AVX512VL__ 1
+// CHECK_ZNVER4_M64: #define __AVX512VNNI__ 1
+// CHECK_ZNVER4_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ZNVER4_M64: #define __AVX__ 1
+// CHECK_ZNVER4_M64: #define __BMI2__ 1
+// CHECK_ZNVER4_M64: #define __BMI__ 1
+// CHECK_ZNVER4_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER4_M64: #define __CLWB__ 1
+// CHECK_ZNVER4_M64: #define __CLZERO__ 1
+// CHECK_ZNVER4_M64: #define __F16C__ 1
+// CHECK_ZNVER4_M64-NOT: #define __FMA4__ 1
+// CHECK_ZNVER4_M64: #define __FMA__ 1
+// CHECK_ZNVER4_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER4_M64: #define __GFNI__ 1
+// CHECK_ZNVER4_M64: #define __LZCNT__ 1
+// CHECK_ZNVER4_M64: #define __MMX__ 1
+// CHECK_ZNVER4_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER4_M64: #define __PKU__ 1
+// CHECK_ZNVER4_M64: #define __POPCNT__ 1
+// CHECK_ZNVER4_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER4_M64: #define __RDPID__ 1
+// CHECK_ZNVER4_M64: #define __RDPRU__ 1
+// CHECK_ZNVER4_M64: #define __RDRND__ 1
+// CHECK_ZNVER4_M64: #define __RDSEED__ 1
+// CHECK_ZNVER4_M64: #define __SHA__ 1
+// CHECK_ZNVER4_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER4_M64: #define __SSE2__ 1
+// CHECK_ZNVER4_M64: #define __SSE3__ 1
+// CHECK_ZNVER4_M64: #define __SSE4A__ 1
+// CHECK_ZNVER4_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER4_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER4_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER4_M64: #define __SSE__ 1
+// CHECK_ZNVER4_M64: #define __SSSE3__ 1
+// CHECK_ZNVER4_M64-NOT: #define __TBM__ 1
+// CHECK_ZNVER4_M64: #define __VAES__ 1
+// CHECK_ZNVER4_M64: #define __VPCLMULQDQ__ 1
+// CHECK_ZNVER4_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER4_M64-NOT: #define __XOP__ 1
+// CHECK_ZNVER4_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER4_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER4_M64: #define __XSAVES__ 1
+// CHECK_ZNVER4_M64: #define __XSAVE__ 1
+// CHECK_ZNVER4_M64: #define __amd64 1
+// CHECK_ZNVER4_M64: #define __amd64__ 1
+// CHECK_ZNVER4_M64: #define __tune_znver4__ 1
+// CHECK_ZNVER4_M64: #define __x86_64 1
+// CHECK_ZNVER4_M64: #define __x86_64__ 1
+// CHECK_ZNVER4_M64: #define __znver4 1
+// CHECK_ZNVER4_M64: #define __znver4__ 1
+
 // End X86/GCC/Linux tests ------------------
 
 // Begin PPC/GCC/Linux tests ----------------
Index: clang/test/Misc/target-invalid-cpu-note.c
===================================================================
--- clang/test/Misc/target-invalid-cpu-note.c
+++ clang/test/Misc/target-invalid-cpu-note.c
@@ -13,19 +13,19 @@
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
-// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
+// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
 // X86_64: error: unknown target CPU 'not-a-cpu'
-// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
+// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
 // TUNE_X86: error: unknown target CPU 'not-a-cpu'
-// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}}
+// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
-// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}}
+// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
Index: clang/test/Frontend/x86-target-cpu.c
===================================================================
--- clang/test/Frontend/x86-target-cpu.c
+++ clang/test/Frontend/x86-target-cpu.c
@@ -37,5 +37,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s
 //
 // expected-no-diagnostics
Index: clang/test/Driver/x86-march.c
===================================================================
--- clang/test/Driver/x86-march.c
+++ clang/test/Driver/x86-march.c
@@ -206,6 +206,10 @@
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver3 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=znver3
 // znver3: "-target-cpu" "znver3"
+//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver4 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=znver4
+// znver4: "-target-cpu" "znver4"
 
 // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
 // x86-64: "-target-cpu" "x86-64"
Index: clang/test/CodeGen/target-builtin-noerror.c
===================================================================
--- clang/test/CodeGen/target-builtin-noerror.c
+++ clang/test/CodeGen/target-builtin-noerror.c
@@ -135,4 +135,5 @@
   (void)__builtin_cpu_is("znver1");
   (void)__builtin_cpu_is("znver2");
   (void)__builtin_cpu_is("znver3");
+  (void)__builtin_cpu_is("znver4");
 }
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -607,6 +607,9 @@
   case CK_ZNVER3:
     defineCPUMacros(Builder, "znver3");
     break;
+  case CK_ZNVER4:
+    defineCPUMacros(Builder, "znver4");
+    break;
   case CK_Geode:
     defineCPUMacros(Builder, "geode");
     break;
@@ -1440,6 +1443,7 @@
     case CK_ZNVER1:
     case CK_ZNVER2:
     case CK_ZNVER3:
+    case CK_ZNVER4:
     // Deprecated
     case CK_x86_64:
     case CK_x86_64_v2:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to