llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-driver

Author: Xiaomeng Zhang (zhangxiaomeng-hygon)

<details>
<summary>Changes</summary>

This patch adds initial support for Hygon C86-4G-M8 architectures:

- Added C86-4G-M8 CPU targets recognition in Clang and LLVM
- Added C86-4G-M8 to target parser and host CPU detection
- Updated compiler-rt CPU model detection for C86-4G-M8
- Added C86-4G-M8 to various optimizer tests
- Added scheduler models and llvm-mca tests for C86-4G-M8 CPU targets

---

Patch is 3.58 MiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/204587.diff


94 Files Affected:

- (modified) clang/lib/Basic/Targets/X86.cpp (+4) 
- (modified) clang/test/CodeGen/target-builtin-noerror.c (+1) 
- (modified) clang/test/Driver/x86-march.c (+4) 
- (modified) clang/test/Frontend/x86-target-cpu.c (+1) 
- (modified) clang/test/Misc/target-invalid-cpu-note/x86.c (+4) 
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (+124) 
- (modified) compiler-rt/lib/builtins/cpu_model/x86.c (+6) 
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+1) 
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.h (+1) 
- (modified) llvm/lib/Target/X86/X86.td (+9) 
- (added) llvm/lib/Target/X86/X86ScheduleC864GM8.td (+3733) 
- (modified) llvm/lib/TargetParser/Host.cpp (+5) 
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+4) 
- (modified) llvm/test/CodeGen/X86/bypass-slow-division-64.ll (+1) 
- (modified) llvm/test/CodeGen/X86/cmp16.ll (+1) 
- (modified) llvm/test/CodeGen/X86/cpus-hygon.ll (+1) 
- (modified) llvm/test/CodeGen/X86/rdpru.ll (+1) 
- (modified) llvm/test/CodeGen/X86/slow-unaligned-mem.ll (+1) 
- (modified) llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll (+1) 
- (modified) llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll (+1) 
- (modified) llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll (+1) 
- (modified) llvm/test/MC/X86/x86_long_nop.s (+2) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/independent-load-stores.s (+158) 
- (added) 
llvm/test/tools/llvm-mca/X86/C864GM8/partially-overlapping-group-resources.s 
(+94) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-adx.s (+73) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-aes.s (+89) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx1.s (+2449) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx2.s (+1099) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512.s (+3267) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bitalg.s (+98) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bitalgvl.s 
(+149) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bw.s (+1657) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bwvl.s (+2979) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512cd.s (+167) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512cdvl.s (+285) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512dq.s (+1280) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512dqvl.s (+1680) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512gfni.s (+122) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512gfnivl.s (+197) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512ifma.s (+103) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512ifmavl.s (+159) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vaes.s (+75) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vaesvl.s (+103) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmi.s (+132) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmi2.s (+411) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmi2vl.s (+775) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmivl.s (+217) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vl.s (+5297) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vnni.s (+159) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vnnivl.s (+271) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vp2intersect.s 
(+53) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vp2intersectvl.s 
(+73) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpclmulqdq.s 
(+54) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpclmulqdqvl.s 
(+61) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpopcntdq.s 
(+107) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpopcntdqvl.s 
(+167) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avxgfni.s (+89) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avxvnni.s (+103) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-bmi1.s (+138) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-bmi2.s (+159) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-clflushopt.s (+51) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-clwb.s (+51) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-cmov.s (+341) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-cmpxchg.s (+60) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-f16c.s (+75) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-fma.s (+719) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-fsgsbase.s (+75) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-gfni.s (+68) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-lea.s (+455) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-lzcnt.s (+68) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-mmx.s (+411) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-movbe.s (+68) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-mwaitx.s (+54) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-pclmul.s (+54) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-popcnt.s (+68) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-prefetchw.s (+54) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-rdrand.s (+57) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-rdseed.s (+57) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sha.s (+96) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-shstk.s (+84) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse1.s (+479) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse2.s (+978) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse3.s (+122) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse41.s (+384) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse42.s (+117) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse4a.s (+68) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-ssse3.s (+271) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-vaes.s (+75) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-vpclmulqdq.s (+54) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-x86_32.s (+96) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-x86_64.s (+2897) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-x87.s (+539) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-xsave.s (+67) 
- (added) llvm/test/tools/llvm-mca/X86/C864GM8/zero-idioms.s (+799) 


``````````diff
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 18036ba109db0..2943604f8c8ff 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -733,6 +733,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   case CK_C86_4G_M7:
     defineCPUMacros(Builder, "c86_4g_m7");
     break;
+  case CK_C86_4G_M8:
+    defineCPUMacros(Builder, "c86_4g_m8");
+    break;
   }
 
   // Target properties.
@@ -1665,6 +1668,7 @@ std::optional<unsigned> 
X86TargetInfo::getCPUCacheLineSize() const {
     case CK_C86_4G_M4:
     case CK_C86_4G_M6:
     case CK_C86_4G_M7:
+    case CK_C86_4G_M8:
     // Deprecated
     case CK_x86_64:
     case CK_x86_64_v2:
diff --git a/clang/test/CodeGen/target-builtin-noerror.c 
b/clang/test/CodeGen/target-builtin-noerror.c
index bb4c65991ab50..1070f232a492a 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -214,4 +214,5 @@ void verifycpustrings(void) {
   (void)__builtin_cpu_is("c86-4g-m4");
   (void)__builtin_cpu_is("c86-4g-m6");
   (void)__builtin_cpu_is("c86-4g-m7");
+  (void)__builtin_cpu_is("c86-4g-m8");
 }
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index b05e025fca81c..09a371e6a5e9c 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -275,6 +275,10 @@
 // RUN:   | FileCheck %s -check-prefix=c86-4g-m7
 // c86-4g-m7: "-target-cpu" "c86-4g-m7"
 
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=c86-4g-m8 2>&1 
\
+// RUN:   | FileCheck %s -check-prefix=c86-4g-m8
+// c86-4g-m8: "-target-cpu" "c86-4g-m8"
+
 // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s 
--check-prefix=x86-64
 // x86-64: "-target-cpu" "x86-64"
 // RUN: %clang -target x86_64 -c -### %s -march=x86-64-v2 2>&1 | FileCheck %s 
--check-prefix=x86-64-v2
diff --git a/clang/test/Frontend/x86-target-cpu.c 
b/clang/test/Frontend/x86-target-cpu.c
index 281e41c03c4d0..cbb62a9f8465f 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -43,5 +43,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m4 
-verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m6 
-verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m7 
-verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m8 
-verify %s
 //
 // expected-no-diagnostics
diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c 
b/clang/test/Misc/target-invalid-cpu-note/x86.c
index 921c4b7c5cab0..69b2c617ff475 100644
--- a/clang/test/Misc/target-invalid-cpu-note/x86.c
+++ b/clang/test/Misc/target-invalid-cpu-note/x86.c
@@ -107,6 +107,7 @@
 // X86-SAME: {{^}}, c86-4g-m4
 // X86-SAME: {{^}}, c86-4g-m6
 // X86-SAME: {{^}}, c86-4g-m7
+// X86-SAME: {{^}}, c86-4g-m8
 // X86-SAME: {{^}}, x86-64
 // X86-SAME: {{^}}, x86-64-v2
 // X86-SAME: {{^}}, x86-64-v3
@@ -191,6 +192,7 @@
 // X86_64-SAME: {{^}}, c86-4g-m4
 // X86_64-SAME: {{^}}, c86-4g-m6
 // X86_64-SAME: {{^}}, c86-4g-m7
+// X86_64-SAME: {{^}}, c86-4g-m8
 // X86_64-SAME: {{^}}, x86-64
 // X86_64-SAME: {{^}}, x86-64-v2
 // X86_64-SAME: {{^}}, x86-64-v3
@@ -302,6 +304,7 @@
 // TUNE_X86-SAME: {{^}}, c86-4g-m4
 // TUNE_X86-SAME: {{^}}, c86-4g-m6
 // TUNE_X86-SAME: {{^}}, c86-4g-m7
+// TUNE_X86-SAME: {{^}}, c86-4g-m8
 // TUNE_X86-SAME: {{^}}, x86-64
 // TUNE_X86-SAME: {{^}}, geode
 // TUNE_X86-SAME: {{$}}
@@ -411,6 +414,7 @@
 // TUNE_X86_64-SAME: {{^}}, c86-4g-m4
 // TUNE_X86_64-SAME: {{^}}, c86-4g-m6
 // TUNE_X86_64-SAME: {{^}}, c86-4g-m7
+// TUNE_X86_64-SAME: {{^}}, c86-4g-m8
 // TUNE_X86_64-SAME: {{^}}, x86-64
 // TUNE_X86_64-SAME: {{^}}, geode
 // TUNE_X86_64-SAME: {{$}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c 
b/clang/test/Preprocessor/predefined-arch-macros.c
index 96f7f68694adb..319d146138da8 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -4585,6 +4585,130 @@
 // CHECK_C864GM7_M64: #define __x86_64 1
 // CHECK_C864GM7_M64: #define __x86_64__ 1
 
+// RUN: %clang -march=c86-4g-m8 -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_C864GM8_M32
+// CHECK_C864GM8_M32: #define __ADX__ 1
+// CHECK_C864GM8_M32: #define __AES__ 1
+// CHECK_C864GM8_M32: #define __AVX2__ 1
+// CHECK_C864GM8_M32: #define __AVX512BF16__ 1
+// CHECK_C864GM8_M32: #define __AVX512BITALG__ 1
+// CHECK_C864GM8_M32: #define __AVX512BW__ 1
+// CHECK_C864GM8_M32: #define __AVX512CD__ 1
+// CHECK_C864GM8_M32: #define __AVX512DQ__ 1
+// CHECK_C864GM8_M32: #define __AVX512F__ 1
+// CHECK_C864GM8_M32: #define __AVX512IFMA__ 1
+// CHECK_C864GM8_M32: #define __AVX512VBMI2__ 1
+// CHECK_C864GM8_M32: #define __AVX512VBMI__ 1
+// CHECK_C864GM8_M32: #define __AVX512VL__ 1
+// CHECK_C864GM8_M32: #define __AVX512VNNI__ 1
+// CHECK_C864GM8_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_C864GM8_M32: #define __AVX__ 1
+// CHECK_C864GM8_M32: #define __BMI2__ 1
+// CHECK_C864GM8_M32: #define __BMI__ 1
+// CHECK_C864GM8_M32: #define __CLFLUSHOPT__ 1
+// CHECK_C864GM8_M32: #define __CLWB__ 1
+// CHECK_C864GM8_M32: #define __CLZERO__ 1
+// CHECK_C864GM8_M32: #define __CRC32__ 1
+// CHECK_C864GM8_M32: #define __F16C__ 1
+// CHECK_C864GM8_M32: #define __FMA__ 1
+// CHECK_C864GM8_M32: #define __FSGSBASE__ 1
+// CHECK_C864GM8_M32: #define __FXSR__ 1
+// CHECK_C864GM8_M32: #define __GFNI__ 1
+// CHECK_C864GM8_M32: #define __LZCNT__ 1
+// CHECK_C864GM8_M32: #define __MMX__ 1
+// CHECK_C864GM8_M32: #define __MOVBE__ 1
+// CHECK_C864GM8_M32: #define __MWAITX__ 1
+// CHECK_C864GM8_M32: #define __PCLMUL__ 1
+// CHECK_C864GM8_M32: #define __POPCNT__ 1
+// CHECK_C864GM8_M32: #define __PRFCHW__ 1
+// CHECK_C864GM8_M32: #define __RDRND__ 1
+// CHECK_C864GM8_M32: #define __RDSEED__ 1
+// CHECK_C864GM8_M32: #define __SHA__ 1
+// CHECK_C864GM8_M32: #define __SHSTK__ 1
+// CHECK_C864GM8_M32: #define __SSE2_MATH__ 1
+// CHECK_C864GM8_M32: #define __SSE2__ 1
+// CHECK_C864GM8_M32: #define __SSE3__ 1
+// CHECK_C864GM8_M32: #define __SSE4A__ 1
+// CHECK_C864GM8_M32: #define __SSE4_1__ 1
+// CHECK_C864GM8_M32: #define __SSE4_2__ 1
+// CHECK_C864GM8_M32: #define __SSE_MATH__ 1
+// CHECK_C864GM8_M32: #define __SSE__ 1
+// CHECK_C864GM8_M32: #define __SSSE3__ 1
+// CHECK_C864GM8_M32: #define __VAES__ 1
+// CHECK_C864GM8_M32: #define __VPCLMULQDQ__ 1
+// CHECK_C864GM8_M32: #define __WBNOINVD__ 1
+// CHECK_C864GM8_M32: #define __XSAVEC__ 1
+// CHECK_C864GM8_M32: #define __XSAVEOPT__ 1
+// CHECK_C864GM8_M32: #define __XSAVES__ 1
+// CHECK_C864GM8_M32: #define __XSAVE__ 1
+// CHECK_C864GM8_M32: #define __c86_4g_m8__ 1
+// CHECK_C864GM8_M32: #define __i386 1
+// CHECK_C864GM8_M32: #define __i386__ 1
+// CHECK_C864GM8_M32: #define __tune_c86_4g_m8__ 1
+
+// RUN: %clang -march=c86-4g-m8 -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_C864GM8_M64
+// CHECK_C864GM8_M64: #define __ADX__ 1
+// CHECK_C864GM8_M64: #define __AES__ 1
+// CHECK_C864GM8_M64: #define __AVX2__ 1
+// CHECK_C864GM8_M64: #define __AVX512BF16__ 1
+// CHECK_C864GM8_M64: #define __AVX512BITALG__ 1
+// CHECK_C864GM8_M64: #define __AVX512BW__ 1
+// CHECK_C864GM8_M64: #define __AVX512CD__ 1
+// CHECK_C864GM8_M64: #define __AVX512DQ__ 1
+// CHECK_C864GM8_M64: #define __AVX512F__ 1
+// CHECK_C864GM8_M64: #define __AVX512IFMA__ 1
+// CHECK_C864GM8_M64: #define __AVX512VBMI2__ 1
+// CHECK_C864GM8_M64: #define __AVX512VBMI__ 1
+// CHECK_C864GM8_M64: #define __AVX512VL__ 1
+// CHECK_C864GM8_M64: #define __AVX512VNNI__ 1
+// CHECK_C864GM8_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_C864GM8_M64: #define __AVX__ 1
+// CHECK_C864GM8_M64: #define __BMI2__ 1
+// CHECK_C864GM8_M64: #define __BMI__ 1
+// CHECK_C864GM8_M64: #define __CLFLUSHOPT__ 1
+// CHECK_C864GM8_M64: #define __CLWB__ 1
+// CHECK_C864GM8_M64: #define __CLZERO__ 1
+// CHECK_C864GM8_M64: #define __CRC32__ 1
+// CHECK_C864GM8_M64: #define __F16C__ 1
+// CHECK_C864GM8_M64: #define __FMA__ 1
+// CHECK_C864GM8_M64: #define __FSGSBASE__ 1
+// CHECK_C864GM8_M64: #define __FXSR__ 1
+// CHECK_C864GM8_M64: #define __GFNI__ 1
+// CHECK_C864GM8_M64: #define __LZCNT__ 1
+// CHECK_C864GM8_M64: #define __MMX__ 1
+// CHECK_C864GM8_M64: #define __MOVBE__ 1
+// CHECK_C864GM8_M64: #define __MWAITX__ 1
+// CHECK_C864GM8_M64: #define __PCLMUL__ 1
+// CHECK_C864GM8_M64: #define __POPCNT__ 1
+// CHECK_C864GM8_M64: #define __PRFCHW__ 1
+// CHECK_C864GM8_M64: #define __RDRND__ 1
+// CHECK_C864GM8_M64: #define __RDSEED__ 1
+// CHECK_C864GM8_M64: #define __SHA__ 1
+// CHECK_C864GM8_M64: #define __SHSTK__ 1
+// CHECK_C864GM8_M64: #define __SSE2_MATH__ 1
+// CHECK_C864GM8_M64: #define __SSE2__ 1
+// CHECK_C864GM8_M64: #define __SSE3__ 1
+// CHECK_C864GM8_M64: #define __SSE4A__ 1
+// CHECK_C864GM8_M64: #define __SSE4_1__ 1
+// CHECK_C864GM8_M64: #define __SSE4_2__ 1
+// CHECK_C864GM8_M64: #define __SSE_MATH__ 1
+// CHECK_C864GM8_M64: #define __SSE__ 1
+// CHECK_C864GM8_M64: #define __SSSE3__ 1
+// CHECK_C864GM8_M64: #define __VAES__ 1
+// CHECK_C864GM8_M64: #define __VPCLMULQDQ__ 1
+// CHECK_C864GM8_M64: #define __WBNOINVD__ 1
+// CHECK_C864GM8_M64: #define __XSAVEC__ 1
+// CHECK_C864GM8_M64: #define __XSAVEOPT__ 1
+// CHECK_C864GM8_M64: #define __XSAVES__ 1
+// CHECK_C864GM8_M64: #define __XSAVE__ 1
+// CHECK_C864GM8_M64: #define __c86_4g_m8__ 1
+// CHECK_C864GM8_M64: #define __tune_c86_4g_m8__ 1
+// CHECK_C864GM8_M64: #define __x86_64 1
+// CHECK_C864GM8_M64: #define __x86_64__ 1
+
 
 // End X86/GCC/Linux tests ------------------
 
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c 
b/compiler-rt/lib/builtins/cpu_model/x86.c
index b0e1f0bdcc5c4..b11cb1ba553ad 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -114,6 +114,7 @@ enum ProcessorSubtypes {
   HYGONFAM18H_C86_4G_M4,
   HYGONFAM18H_C86_4G_M6,
   HYGONFAM18H_C86_4G_M7,
+  HYGONFAM18H_C86_4G_M8,
   CPU_SUBTYPE_MAX
 };
 
@@ -905,6 +906,11 @@ getHygonProcessorTypeAndSubtype(unsigned Family, unsigned 
Model,
       Type = HYGONFAM18H;
       Subtype = HYGONFAM18H_C86_4G_M7;
       break; // c86-4g-m7
+    case 8:
+      CPU = "c86-4g-m8";
+      Type = HYGONFAM18H;
+      Subtype = HYGONFAM18H_C86_4G_M8;
+      break; // c86-4g-m8
     }
     break; // Hygon Family 18H
   default:
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def 
b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 0194941bb70e0..bf1b6c894d959 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -114,6 +114,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_NOVALAKE,       "novalake")
 X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M4,       "c86-4g-m4")
 X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M6,       "c86-4g-m6")
 X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M7,       "c86-4g-m7")
+X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M8,       "c86-4g-m8")
 
 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h 
b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 85b10d500f0a4..351af41079d3e 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -150,6 +150,7 @@ enum CPUKind {
   CK_C86_4G_M4,
   CK_C86_4G_M6,
   CK_C86_4G_M7,
+  CK_C86_4G_M8,
   CK_x86_64,
   CK_x86_64_v2,
   CK_x86_64_v3,
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 7551966cb8e15..808d1c3921b86 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -852,6 +852,7 @@ include "X86SchedLunarlakeP.td"
 include "X86SchedSapphireRapids.td"
 include "X86ScheduleC864GM4.td"
 include "X86ScheduleC864GM7.td"
+include "X86ScheduleC864GM8.td"
 
 
 
//===----------------------------------------------------------------------===//
@@ -1753,6 +1754,12 @@ def ProcessorFeatures {
   list<SubtargetFeature> C864GM7AdditionalTuning = [TuningBranchFusion];
   list<SubtargetFeature> C864GM7Tuning =
     !listconcat(C864GM4Tuning, C864GM7AdditionalTuning);
+
+  // C86-4G-M8
+  list<SubtargetFeature> C864GM8AdditionalFeatures = [FeatureSHSTK];
+  list<SubtargetFeature> C864GM8Features =
+    !listconcat(C864GM7Features, C864GM8AdditionalFeatures);
+  list<SubtargetFeature> C864GM8Tuning = C864GM7Tuning;
 }
 
 
//===----------------------------------------------------------------------===//
@@ -2126,6 +2133,8 @@ def : ProcModel<P, C864GM4Model, 
ProcessorFeatures.C864GM4Features,
 }
 def : ProcModel<"c86-4g-m7", C864GM7Model, ProcessorFeatures.C864GM7Features,
                 ProcessorFeatures.C864GM7Tuning>;
+def : ProcModel<"c86-4g-m8", C864GM8Model, ProcessorFeatures.C864GM8Features,
+                ProcessorFeatures.C864GM8Tuning>;
 
 def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, 
FeaturePRFCHW],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleC864GM8.td 
b/llvm/lib/Target/X86/X86ScheduleC864GM8.td
new file mode 100644
index 0000000000000..19fbfe4c3da62
--- /dev/null
+++ b/llvm/lib/Target/X86/X86ScheduleC864GM8.td
@@ -0,0 +1,3733 @@
+//=- X86ScheduleC864GM8.td - X86 C86-4G-M8 Scheduling --------*- tablegen 
-*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for C86-4G-M8 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def C864GM8Model : SchedMachineModel {
+  let IssueWidth = 6;
+  let MicroOpBufferSize = 256;
+  // The maximum dispatch is 6 ops/cycle, and a mispredict cost is 16 cycle 
from
+  // the op-cache, so the loop buffer is limited to 6*16 = 96.
+  let LoopMicroOpBufferSize = 96;
+  // The common case of branch misprediction penalty is 16 cycles.
+  let MispredictPenalty = 16;
+
+  let LoadLatency = 4;
+  int VecLoadLatency = 7;
+  int StoreLatency = 1;
+  let HighLatency = 25;
+
+  let PostRAScheduler = 1;
+
+  let CompleteModel = 1;
+}
+
+let SchedModel = C864GM8Model in {
+
+//===----------------------------------------------------------------------===//
+// Integer Execution Unit
+//===----------------------------------------------------------------------===//
+
+// The C864GM8 has 4 ALUs.
+def C4GM8ALU0 : ProcResource<1>;
+def C4GM8ALU1 : ProcResource<1>;
+def C4GM8ALU2 : ProcResource<1>;
+def C4GM8ALU3 : ProcResource<1>;
+
+def C4GM8ALU : ProcResGroup<[C4GM8ALU0, C4GM8ALU1, C4GM8ALU2, C4GM8ALU3]>;
+def C4GM8ALU12 : ProcResGroup<[C4GM8ALU1, C4GM8ALU2]>;
+def C4GM8ALU03 : ProcResGroup<[C4GM8ALU0, C4GM8ALU3]>;
+
+// BRU0 on ALU0.
+defvar C4GM8BRU0 = C4GM8ALU0;
+// BRU1 is a separate branch execution unit.
+def C4GM8BRU1 : ProcResource<1>;
+
+def C4GM8BRU : ProcResGroup<[C4GM8BRU0, C4GM8BRU1]>;
+
+// The C864GM8 has 3 AGUs.
+def C4GM8AGU0 : ProcResource<1>;
+def C4GM8AGU1 : ProcResource<1>;
+def C4GM8AGU2 : ProcResource<1>;
+
+def C4GM8AGU : ProcResGroup<[C4GM8AGU0, C4GM8AGU1, C4GM8AGU2]>;
+
+// 96 Entry (4x24 entries) integer Scheduler.
+def C4GM8Int : ProcResGroup<[C4GM8ALU0, C4GM8AGU0, C4GM8BRU0,
+                             C4GM8ALU1, C4GM8AGU1,
+                             C4GM8ALU2, C4GM8AGU2,
+                             C4GM8ALU3,            C4GM8BRU1]> {
+  let BufferSize = 96;
+}
+
+// The integer physical register file consists of 224 registers.
+def C4GM8IntegerPRF : RegisterFile<224,
+                                   [GR64, CCR],
+                                   [1, 1],
+                                   [1, 0],
+                                   6,  // Max moves that can be eliminated per 
cycle.
+                                   0>; // Restrict move elimination to zero 
regs.
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Unit
+//===----------------------------------------------------------------------===//
+
+// The C864GM8 has 4 FPUs.
+def C4GM8FPU0 : ProcResource<1>;
+def C4GM8FPU1 : ProcResource<1>;
+def C4GM8FPU2 : ProcResource<1>;
+def C4GM8FPU3 : ProcResource<1>;
+
+def C4GM8FPU013 : ProcResGroup<[C4GM8FPU0, C4GM8FPU1, C4GM8FPU3]>;
+def C4GM8FPU01  : ProcResGroup<[C4GM8FPU0, C4GM8FPU1]>;
+def C4GM8FPU12  : ProcResGroup<[C4GM8FPU1, C4GM8FPU2]>;
+def C4GM8FPU13  : ProcResGroup<[C4GM8FPU1, C4GM8FPU3]>;
+def C4GM8FPU23  : ProcResGroup<[C4GM8FPU2, C4GM8FPU3]>;
+def C4GM8FPU02  : ProcResGroup<[C4GM8FPU0, C4GM8FPU2]>;
+def C4GM8FPU03  : ProcResGroup<[C4GM8FPU0, C4GM8FPU3]>;
+
+// 48 floating-point Scheduler Queue.
+def C4GM8FPU : ProcResGroup<[C4GM8FPU0, C4GM8FPU1, C4GM8FPU2, C4GM8FPU3]> {
+  let BufferSize = 48;
+}
+
+// 28 floating-point store Scheduler Queue.
+def C4GM8FPU45 : ProcResource<2> {
+  let BufferSize = 28;
+}
+
+// FP store and Int2FP
+defvar C4GM8FPLd01 = C4GM8FPU45;
+
+// 1 FP store per cycle. 
+let Super = C4GM8FPLd01 in
+def C4GM8FPSt : ProcResource<1>;
+
+// The floating point physical register file consists of 256 bits x 208 
registers.
+def C4GM8FpuPRF : RegisterFile<224,
+                               [VR64, VR128, VR256, VR512],
+                               [1, 1, 1, 2],
+                               [0, 1, 1, 0],
+                               6,  // Max moves that can be eliminated per 
cycle.
+                               0>; // Restrict move elimination to zero regs.
+
+
+//===----------------------------------------------------------------------===//
+// Load-Store Unit
+//===----------------------------------------------------------------------===//
+
+// Load/Store Units and Memory Queues
+// The C864GM8 has 4 LS Units.
+def C4GM8LSU : ProcResource<4>;
+
+// The three of LSU can be loads.
+let Super = C4GM8LSU in
+def C4GM8Load : ProcResource<3> {
+  // The LDQ is 54.
+  let BufferSize = 54;
+}
+def C4GM8LoadQueue : LoadQueue<C4GM8Load>;
+
+// All four of LSU can be loads.
+let Super = C4GM8LSU in
+def C4GM8Store : ProcResource<4> {
+  // The STQ is 52.
+  let BufferSize = 52;
+}
+def C4GM8StoreQueue : StoreQueue<C4GM8Store>;
+
+def : ReadAdvance<ReadAfterLd, C864GM8Model.LoadLatency>;
+def : ReadAdvance<ReadAfterVecLd, C864GM8Model.VecLoadLatency>;
+def : ReadAdvance<ReadAfterVecXLd, C864GM8Model.VecLoadLatency>;
+def : ReadAdvance<ReadAfterVecYLd, C864GM8Model.VecLoadLatency>;
+def : ReadAdvance<ReadInt2Fpu, -1>;
+
+
+//===----------------------------------------------------------------------===//
+// Retire Control Unit
+//===----------------------------------------------------------------------===//
+def C4GM8RCU : RetireControlUnit<C864GM8Model.MicroOpBufferSize, 8>;
+
+//===----------------------------------------------------------------------===//
+// Basic helper classes.
+//===----------------------------------------------------------------------===//
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+
+
+multiclass __C4GM8WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
+                         int Lat = 1, list<int> Res = [], int UOps = 1> {
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ReleaseAtCycles = Res;
+    let NumMicroOps = UOps;
+  }
+}
+
+multiclass __C4GM8WriteResPair<X86FoldableSchedWrite SchedRW,
+                             list<ProcResourceKind> ExePorts, int Lat,
+                             list<int> Res, int UOps, int LoadLat, int 
LoadUOps,
+                             ProcResourceKind AGU, int LoadRes> {
+  defm : __C4GM8WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+
+  defm : __C4GM8WriteRes<SchedRW.Folded,
+                       !listconcat([AGU, C4GM8Load], ExePorts),
+                       !add(LoadLat, Lat),
+                       !if(!and(!empty(Res), !eq(LoadRes, 1)),
+                         [],
+                         !listconcat([...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/204587
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to