llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: Xiaomeng Zhang (zhangxiaomeng-hygon) <details> <summary>Changes</summary> This patch adds initial support for Hygon C86-4G-M8 architectures: - Added C86-4G-M8 CPU targets recognition in Clang and LLVM - Added C86-4G-M8 to target parser and host CPU detection - Updated compiler-rt CPU model detection for C86-4G-M8 - Added C86-4G-M8 to various optimizer tests - Added scheduler models and llvm-mca tests for C86-4G-M8 CPU targets --- Patch is 3.58 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/204587.diff 94 Files Affected: - (modified) clang/lib/Basic/Targets/X86.cpp (+4) - (modified) clang/test/CodeGen/target-builtin-noerror.c (+1) - (modified) clang/test/Driver/x86-march.c (+4) - (modified) clang/test/Frontend/x86-target-cpu.c (+1) - (modified) clang/test/Misc/target-invalid-cpu-note/x86.c (+4) - (modified) clang/test/Preprocessor/predefined-arch-macros.c (+124) - (modified) compiler-rt/lib/builtins/cpu_model/x86.c (+6) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+1) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.h (+1) - (modified) llvm/lib/Target/X86/X86.td (+9) - (added) llvm/lib/Target/X86/X86ScheduleC864GM8.td (+3733) - (modified) llvm/lib/TargetParser/Host.cpp (+5) - (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+4) - (modified) llvm/test/CodeGen/X86/bypass-slow-division-64.ll (+1) - (modified) llvm/test/CodeGen/X86/cmp16.ll (+1) - (modified) llvm/test/CodeGen/X86/cpus-hygon.ll (+1) - (modified) llvm/test/CodeGen/X86/rdpru.ll (+1) - (modified) llvm/test/CodeGen/X86/slow-unaligned-mem.ll (+1) - (modified) llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll (+1) - (modified) llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll (+1) - (modified) llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll (+1) - (modified) llvm/test/MC/X86/x86_long_nop.s (+2) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/independent-load-stores.s (+158) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/partially-overlapping-group-resources.s (+94) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-adx.s (+73) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-aes.s (+89) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx1.s (+2449) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx2.s (+1099) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512.s (+3267) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bitalg.s (+98) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bitalgvl.s (+149) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bw.s (+1657) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512bwvl.s (+2979) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512cd.s (+167) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512cdvl.s (+285) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512dq.s (+1280) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512dqvl.s (+1680) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512gfni.s (+122) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512gfnivl.s (+197) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512ifma.s (+103) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512ifmavl.s (+159) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vaes.s (+75) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vaesvl.s (+103) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmi.s (+132) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmi2.s (+411) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmi2vl.s (+775) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vbmivl.s (+217) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vl.s (+5297) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vnni.s (+159) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vnnivl.s (+271) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vp2intersect.s (+53) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vp2intersectvl.s (+73) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpclmulqdq.s (+54) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpclmulqdqvl.s (+61) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpopcntdq.s (+107) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avx512vpopcntdqvl.s (+167) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avxgfni.s (+89) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-avxvnni.s (+103) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-bmi1.s (+138) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-bmi2.s (+159) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-clflushopt.s (+51) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-clwb.s (+51) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-cmov.s (+341) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-cmpxchg.s (+60) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-f16c.s (+75) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-fma.s (+719) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-fsgsbase.s (+75) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-gfni.s (+68) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-lea.s (+455) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-lzcnt.s (+68) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-mmx.s (+411) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-movbe.s (+68) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-mwaitx.s (+54) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-pclmul.s (+54) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-popcnt.s (+68) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-prefetchw.s (+54) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-rdrand.s (+57) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-rdseed.s (+57) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sha.s (+96) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-shstk.s (+84) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse1.s (+479) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse2.s (+978) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse3.s (+122) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse41.s (+384) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse42.s (+117) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-sse4a.s (+68) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-ssse3.s (+271) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-vaes.s (+75) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-vpclmulqdq.s (+54) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-x86_32.s (+96) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-x86_64.s (+2897) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-x87.s (+539) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/resources-xsave.s (+67) - (added) llvm/test/tools/llvm-mca/X86/C864GM8/zero-idioms.s (+799) ``````````diff diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 18036ba109db0..2943604f8c8ff 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -733,6 +733,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_C86_4G_M7: defineCPUMacros(Builder, "c86_4g_m7"); break; + case CK_C86_4G_M8: + defineCPUMacros(Builder, "c86_4g_m8"); + break; } // Target properties. @@ -1665,6 +1668,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const { case CK_C86_4G_M4: case CK_C86_4G_M6: case CK_C86_4G_M7: + case CK_C86_4G_M8: // Deprecated case CK_x86_64: case CK_x86_64_v2: diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index bb4c65991ab50..1070f232a492a 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -214,4 +214,5 @@ void verifycpustrings(void) { (void)__builtin_cpu_is("c86-4g-m4"); (void)__builtin_cpu_is("c86-4g-m6"); (void)__builtin_cpu_is("c86-4g-m7"); + (void)__builtin_cpu_is("c86-4g-m8"); } diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c index b05e025fca81c..09a371e6a5e9c 100644 --- a/clang/test/Driver/x86-march.c +++ b/clang/test/Driver/x86-march.c @@ -275,6 +275,10 @@ // RUN: | FileCheck %s -check-prefix=c86-4g-m7 // c86-4g-m7: "-target-cpu" "c86-4g-m7" +// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=c86-4g-m8 2>&1 \ +// RUN: | FileCheck %s -check-prefix=c86-4g-m8 +// c86-4g-m8: "-target-cpu" "c86-4g-m8" + // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64 // x86-64: "-target-cpu" "x86-64" // RUN: %clang -target x86_64 -c -### %s -march=x86-64-v2 2>&1 | FileCheck %s --check-prefix=x86-64-v2 diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c index 281e41c03c4d0..cbb62a9f8465f 100644 --- a/clang/test/Frontend/x86-target-cpu.c +++ b/clang/test/Frontend/x86-target-cpu.c @@ -43,5 +43,6 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m4 -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m6 -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m7 -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu c86-4g-m8 -verify %s // // expected-no-diagnostics diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c index 921c4b7c5cab0..69b2c617ff475 100644 --- a/clang/test/Misc/target-invalid-cpu-note/x86.c +++ b/clang/test/Misc/target-invalid-cpu-note/x86.c @@ -107,6 +107,7 @@ // X86-SAME: {{^}}, c86-4g-m4 // X86-SAME: {{^}}, c86-4g-m6 // X86-SAME: {{^}}, c86-4g-m7 +// X86-SAME: {{^}}, c86-4g-m8 // X86-SAME: {{^}}, x86-64 // X86-SAME: {{^}}, x86-64-v2 // X86-SAME: {{^}}, x86-64-v3 @@ -191,6 +192,7 @@ // X86_64-SAME: {{^}}, c86-4g-m4 // X86_64-SAME: {{^}}, c86-4g-m6 // X86_64-SAME: {{^}}, c86-4g-m7 +// X86_64-SAME: {{^}}, c86-4g-m8 // X86_64-SAME: {{^}}, x86-64 // X86_64-SAME: {{^}}, x86-64-v2 // X86_64-SAME: {{^}}, x86-64-v3 @@ -302,6 +304,7 @@ // TUNE_X86-SAME: {{^}}, c86-4g-m4 // TUNE_X86-SAME: {{^}}, c86-4g-m6 // TUNE_X86-SAME: {{^}}, c86-4g-m7 +// TUNE_X86-SAME: {{^}}, c86-4g-m8 // TUNE_X86-SAME: {{^}}, x86-64 // TUNE_X86-SAME: {{^}}, geode // TUNE_X86-SAME: {{$}} @@ -411,6 +414,7 @@ // TUNE_X86_64-SAME: {{^}}, c86-4g-m4 // TUNE_X86_64-SAME: {{^}}, c86-4g-m6 // TUNE_X86_64-SAME: {{^}}, c86-4g-m7 +// TUNE_X86_64-SAME: {{^}}, c86-4g-m8 // TUNE_X86_64-SAME: {{^}}, x86-64 // TUNE_X86_64-SAME: {{^}}, geode // TUNE_X86_64-SAME: {{$}} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 96f7f68694adb..319d146138da8 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4585,6 +4585,130 @@ // CHECK_C864GM7_M64: #define __x86_64 1 // CHECK_C864GM7_M64: #define __x86_64__ 1 +// RUN: %clang -march=c86-4g-m8 -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C864GM8_M32 +// CHECK_C864GM8_M32: #define __ADX__ 1 +// CHECK_C864GM8_M32: #define __AES__ 1 +// CHECK_C864GM8_M32: #define __AVX2__ 1 +// CHECK_C864GM8_M32: #define __AVX512BF16__ 1 +// CHECK_C864GM8_M32: #define __AVX512BITALG__ 1 +// CHECK_C864GM8_M32: #define __AVX512BW__ 1 +// CHECK_C864GM8_M32: #define __AVX512CD__ 1 +// CHECK_C864GM8_M32: #define __AVX512DQ__ 1 +// CHECK_C864GM8_M32: #define __AVX512F__ 1 +// CHECK_C864GM8_M32: #define __AVX512IFMA__ 1 +// CHECK_C864GM8_M32: #define __AVX512VBMI2__ 1 +// CHECK_C864GM8_M32: #define __AVX512VBMI__ 1 +// CHECK_C864GM8_M32: #define __AVX512VL__ 1 +// CHECK_C864GM8_M32: #define __AVX512VNNI__ 1 +// CHECK_C864GM8_M32: #define __AVX512VPOPCNTDQ__ 1 +// CHECK_C864GM8_M32: #define __AVX__ 1 +// CHECK_C864GM8_M32: #define __BMI2__ 1 +// CHECK_C864GM8_M32: #define __BMI__ 1 +// CHECK_C864GM8_M32: #define __CLFLUSHOPT__ 1 +// CHECK_C864GM8_M32: #define __CLWB__ 1 +// CHECK_C864GM8_M32: #define __CLZERO__ 1 +// CHECK_C864GM8_M32: #define __CRC32__ 1 +// CHECK_C864GM8_M32: #define __F16C__ 1 +// CHECK_C864GM8_M32: #define __FMA__ 1 +// CHECK_C864GM8_M32: #define __FSGSBASE__ 1 +// CHECK_C864GM8_M32: #define __FXSR__ 1 +// CHECK_C864GM8_M32: #define __GFNI__ 1 +// CHECK_C864GM8_M32: #define __LZCNT__ 1 +// CHECK_C864GM8_M32: #define __MMX__ 1 +// CHECK_C864GM8_M32: #define __MOVBE__ 1 +// CHECK_C864GM8_M32: #define __MWAITX__ 1 +// CHECK_C864GM8_M32: #define __PCLMUL__ 1 +// CHECK_C864GM8_M32: #define __POPCNT__ 1 +// CHECK_C864GM8_M32: #define __PRFCHW__ 1 +// CHECK_C864GM8_M32: #define __RDRND__ 1 +// CHECK_C864GM8_M32: #define __RDSEED__ 1 +// CHECK_C864GM8_M32: #define __SHA__ 1 +// CHECK_C864GM8_M32: #define __SHSTK__ 1 +// CHECK_C864GM8_M32: #define __SSE2_MATH__ 1 +// CHECK_C864GM8_M32: #define __SSE2__ 1 +// CHECK_C864GM8_M32: #define __SSE3__ 1 +// CHECK_C864GM8_M32: #define __SSE4A__ 1 +// CHECK_C864GM8_M32: #define __SSE4_1__ 1 +// CHECK_C864GM8_M32: #define __SSE4_2__ 1 +// CHECK_C864GM8_M32: #define __SSE_MATH__ 1 +// CHECK_C864GM8_M32: #define __SSE__ 1 +// CHECK_C864GM8_M32: #define __SSSE3__ 1 +// CHECK_C864GM8_M32: #define __VAES__ 1 +// CHECK_C864GM8_M32: #define __VPCLMULQDQ__ 1 +// CHECK_C864GM8_M32: #define __WBNOINVD__ 1 +// CHECK_C864GM8_M32: #define __XSAVEC__ 1 +// CHECK_C864GM8_M32: #define __XSAVEOPT__ 1 +// CHECK_C864GM8_M32: #define __XSAVES__ 1 +// CHECK_C864GM8_M32: #define __XSAVE__ 1 +// CHECK_C864GM8_M32: #define __c86_4g_m8__ 1 +// CHECK_C864GM8_M32: #define __i386 1 +// CHECK_C864GM8_M32: #define __i386__ 1 +// CHECK_C864GM8_M32: #define __tune_c86_4g_m8__ 1 + +// RUN: %clang -march=c86-4g-m8 -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C864GM8_M64 +// CHECK_C864GM8_M64: #define __ADX__ 1 +// CHECK_C864GM8_M64: #define __AES__ 1 +// CHECK_C864GM8_M64: #define __AVX2__ 1 +// CHECK_C864GM8_M64: #define __AVX512BF16__ 1 +// CHECK_C864GM8_M64: #define __AVX512BITALG__ 1 +// CHECK_C864GM8_M64: #define __AVX512BW__ 1 +// CHECK_C864GM8_M64: #define __AVX512CD__ 1 +// CHECK_C864GM8_M64: #define __AVX512DQ__ 1 +// CHECK_C864GM8_M64: #define __AVX512F__ 1 +// CHECK_C864GM8_M64: #define __AVX512IFMA__ 1 +// CHECK_C864GM8_M64: #define __AVX512VBMI2__ 1 +// CHECK_C864GM8_M64: #define __AVX512VBMI__ 1 +// CHECK_C864GM8_M64: #define __AVX512VL__ 1 +// CHECK_C864GM8_M64: #define __AVX512VNNI__ 1 +// CHECK_C864GM8_M64: #define __AVX512VPOPCNTDQ__ 1 +// CHECK_C864GM8_M64: #define __AVX__ 1 +// CHECK_C864GM8_M64: #define __BMI2__ 1 +// CHECK_C864GM8_M64: #define __BMI__ 1 +// CHECK_C864GM8_M64: #define __CLFLUSHOPT__ 1 +// CHECK_C864GM8_M64: #define __CLWB__ 1 +// CHECK_C864GM8_M64: #define __CLZERO__ 1 +// CHECK_C864GM8_M64: #define __CRC32__ 1 +// CHECK_C864GM8_M64: #define __F16C__ 1 +// CHECK_C864GM8_M64: #define __FMA__ 1 +// CHECK_C864GM8_M64: #define __FSGSBASE__ 1 +// CHECK_C864GM8_M64: #define __FXSR__ 1 +// CHECK_C864GM8_M64: #define __GFNI__ 1 +// CHECK_C864GM8_M64: #define __LZCNT__ 1 +// CHECK_C864GM8_M64: #define __MMX__ 1 +// CHECK_C864GM8_M64: #define __MOVBE__ 1 +// CHECK_C864GM8_M64: #define __MWAITX__ 1 +// CHECK_C864GM8_M64: #define __PCLMUL__ 1 +// CHECK_C864GM8_M64: #define __POPCNT__ 1 +// CHECK_C864GM8_M64: #define __PRFCHW__ 1 +// CHECK_C864GM8_M64: #define __RDRND__ 1 +// CHECK_C864GM8_M64: #define __RDSEED__ 1 +// CHECK_C864GM8_M64: #define __SHA__ 1 +// CHECK_C864GM8_M64: #define __SHSTK__ 1 +// CHECK_C864GM8_M64: #define __SSE2_MATH__ 1 +// CHECK_C864GM8_M64: #define __SSE2__ 1 +// CHECK_C864GM8_M64: #define __SSE3__ 1 +// CHECK_C864GM8_M64: #define __SSE4A__ 1 +// CHECK_C864GM8_M64: #define __SSE4_1__ 1 +// CHECK_C864GM8_M64: #define __SSE4_2__ 1 +// CHECK_C864GM8_M64: #define __SSE_MATH__ 1 +// CHECK_C864GM8_M64: #define __SSE__ 1 +// CHECK_C864GM8_M64: #define __SSSE3__ 1 +// CHECK_C864GM8_M64: #define __VAES__ 1 +// CHECK_C864GM8_M64: #define __VPCLMULQDQ__ 1 +// CHECK_C864GM8_M64: #define __WBNOINVD__ 1 +// CHECK_C864GM8_M64: #define __XSAVEC__ 1 +// CHECK_C864GM8_M64: #define __XSAVEOPT__ 1 +// CHECK_C864GM8_M64: #define __XSAVES__ 1 +// CHECK_C864GM8_M64: #define __XSAVE__ 1 +// CHECK_C864GM8_M64: #define __c86_4g_m8__ 1 +// CHECK_C864GM8_M64: #define __tune_c86_4g_m8__ 1 +// CHECK_C864GM8_M64: #define __x86_64 1 +// CHECK_C864GM8_M64: #define __x86_64__ 1 + // End X86/GCC/Linux tests ------------------ diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index b0e1f0bdcc5c4..b11cb1ba553ad 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -114,6 +114,7 @@ enum ProcessorSubtypes { HYGONFAM18H_C86_4G_M4, HYGONFAM18H_C86_4G_M6, HYGONFAM18H_C86_4G_M7, + HYGONFAM18H_C86_4G_M8, CPU_SUBTYPE_MAX }; @@ -905,6 +906,11 @@ getHygonProcessorTypeAndSubtype(unsigned Family, unsigned Model, Type = HYGONFAM18H; Subtype = HYGONFAM18H_C86_4G_M7; break; // c86-4g-m7 + case 8: + CPU = "c86-4g-m8"; + Type = HYGONFAM18H; + Subtype = HYGONFAM18H_C86_4G_M8; + break; // c86-4g-m8 } break; // Hygon Family 18H default: diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 0194941bb70e0..bf1b6c894d959 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -114,6 +114,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_NOVALAKE, "novalake") X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M4, "c86-4g-m4") X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M6, "c86-4g-m6") X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M7, "c86-4g-m7") +X86_CPU_SUBTYPE(HYGONFAM18H_C86_4G_M8, "c86-4g-m8") // Alternate names supported by __builtin_cpu_is and target multiversioning. X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake") diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h index 85b10d500f0a4..351af41079d3e 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.h +++ b/llvm/include/llvm/TargetParser/X86TargetParser.h @@ -150,6 +150,7 @@ enum CPUKind { CK_C86_4G_M4, CK_C86_4G_M6, CK_C86_4G_M7, + CK_C86_4G_M8, CK_x86_64, CK_x86_64_v2, CK_x86_64_v3, diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 7551966cb8e15..808d1c3921b86 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -852,6 +852,7 @@ include "X86SchedLunarlakeP.td" include "X86SchedSapphireRapids.td" include "X86ScheduleC864GM4.td" include "X86ScheduleC864GM7.td" +include "X86ScheduleC864GM8.td" //===----------------------------------------------------------------------===// @@ -1753,6 +1754,12 @@ def ProcessorFeatures { list<SubtargetFeature> C864GM7AdditionalTuning = [TuningBranchFusion]; list<SubtargetFeature> C864GM7Tuning = !listconcat(C864GM4Tuning, C864GM7AdditionalTuning); + + // C86-4G-M8 + list<SubtargetFeature> C864GM8AdditionalFeatures = [FeatureSHSTK]; + list<SubtargetFeature> C864GM8Features = + !listconcat(C864GM7Features, C864GM8AdditionalFeatures); + list<SubtargetFeature> C864GM8Tuning = C864GM7Tuning; } //===----------------------------------------------------------------------===// @@ -2126,6 +2133,8 @@ def : ProcModel<P, C864GM4Model, ProcessorFeatures.C864GM4Features, } def : ProcModel<"c86-4g-m7", C864GM7Model, ProcessorFeatures.C864GM7Features, ProcessorFeatures.C864GM7Tuning>; +def : ProcModel<"c86-4g-m8", C864GM8Model, ProcessorFeatures.C864GM8Features, + ProcessorFeatures.C864GM8Tuning>; def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; diff --git a/llvm/lib/Target/X86/X86ScheduleC864GM8.td b/llvm/lib/Target/X86/X86ScheduleC864GM8.td new file mode 100644 index 0000000000000..19fbfe4c3da62 --- /dev/null +++ b/llvm/lib/Target/X86/X86ScheduleC864GM8.td @@ -0,0 +1,3733 @@ +//=- X86ScheduleC864GM8.td - X86 C86-4G-M8 Scheduling --------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for C86-4G-M8 to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def C864GM8Model : SchedMachineModel { + let IssueWidth = 6; + let MicroOpBufferSize = 256; + // The maximum dispatch is 6 ops/cycle, and a mispredict cost is 16 cycle from + // the op-cache, so the loop buffer is limited to 6*16 = 96. + let LoopMicroOpBufferSize = 96; + // The common case of branch misprediction penalty is 16 cycles. + let MispredictPenalty = 16; + + let LoadLatency = 4; + int VecLoadLatency = 7; + int StoreLatency = 1; + let HighLatency = 25; + + let PostRAScheduler = 1; + + let CompleteModel = 1; +} + +let SchedModel = C864GM8Model in { + +//===----------------------------------------------------------------------===// +// Integer Execution Unit +//===----------------------------------------------------------------------===// + +// The C864GM8 has 4 ALUs. +def C4GM8ALU0 : ProcResource<1>; +def C4GM8ALU1 : ProcResource<1>; +def C4GM8ALU2 : ProcResource<1>; +def C4GM8ALU3 : ProcResource<1>; + +def C4GM8ALU : ProcResGroup<[C4GM8ALU0, C4GM8ALU1, C4GM8ALU2, C4GM8ALU3]>; +def C4GM8ALU12 : ProcResGroup<[C4GM8ALU1, C4GM8ALU2]>; +def C4GM8ALU03 : ProcResGroup<[C4GM8ALU0, C4GM8ALU3]>; + +// BRU0 on ALU0. +defvar C4GM8BRU0 = C4GM8ALU0; +// BRU1 is a separate branch execution unit. +def C4GM8BRU1 : ProcResource<1>; + +def C4GM8BRU : ProcResGroup<[C4GM8BRU0, C4GM8BRU1]>; + +// The C864GM8 has 3 AGUs. +def C4GM8AGU0 : ProcResource<1>; +def C4GM8AGU1 : ProcResource<1>; +def C4GM8AGU2 : ProcResource<1>; + +def C4GM8AGU : ProcResGroup<[C4GM8AGU0, C4GM8AGU1, C4GM8AGU2]>; + +// 96 Entry (4x24 entries) integer Scheduler. +def C4GM8Int : ProcResGroup<[C4GM8ALU0, C4GM8AGU0, C4GM8BRU0, + C4GM8ALU1, C4GM8AGU1, + C4GM8ALU2, C4GM8AGU2, + C4GM8ALU3, C4GM8BRU1]> { + let BufferSize = 96; +} + +// The integer physical register file consists of 224 registers. +def C4GM8IntegerPRF : RegisterFile<224, + [GR64, CCR], + [1, 1], + [1, 0], + 6, // Max moves that can be eliminated per cycle. + 0>; // Restrict move elimination to zero regs. + +//===----------------------------------------------------------------------===// +// Floating-Point Unit +//===----------------------------------------------------------------------===// + +// The C864GM8 has 4 FPUs. +def C4GM8FPU0 : ProcResource<1>; +def C4GM8FPU1 : ProcResource<1>; +def C4GM8FPU2 : ProcResource<1>; +def C4GM8FPU3 : ProcResource<1>; + +def C4GM8FPU013 : ProcResGroup<[C4GM8FPU0, C4GM8FPU1, C4GM8FPU3]>; +def C4GM8FPU01 : ProcResGroup<[C4GM8FPU0, C4GM8FPU1]>; +def C4GM8FPU12 : ProcResGroup<[C4GM8FPU1, C4GM8FPU2]>; +def C4GM8FPU13 : ProcResGroup<[C4GM8FPU1, C4GM8FPU3]>; +def C4GM8FPU23 : ProcResGroup<[C4GM8FPU2, C4GM8FPU3]>; +def C4GM8FPU02 : ProcResGroup<[C4GM8FPU0, C4GM8FPU2]>; +def C4GM8FPU03 : ProcResGroup<[C4GM8FPU0, C4GM8FPU3]>; + +// 48 floating-point Scheduler Queue. +def C4GM8FPU : ProcResGroup<[C4GM8FPU0, C4GM8FPU1, C4GM8FPU2, C4GM8FPU3]> { + let BufferSize = 48; +} + +// 28 floating-point store Scheduler Queue. +def C4GM8FPU45 : ProcResource<2> { + let BufferSize = 28; +} + +// FP store and Int2FP +defvar C4GM8FPLd01 = C4GM8FPU45; + +// 1 FP store per cycle. +let Super = C4GM8FPLd01 in +def C4GM8FPSt : ProcResource<1>; + +// The floating point physical register file consists of 256 bits x 208 registers. +def C4GM8FpuPRF : RegisterFile<224, + [VR64, VR128, VR256, VR512], + [1, 1, 1, 2], + [0, 1, 1, 0], + 6, // Max moves that can be eliminated per cycle. + 0>; // Restrict move elimination to zero regs. + + +//===----------------------------------------------------------------------===// +// Load-Store Unit +//===----------------------------------------------------------------------===// + +// Load/Store Units and Memory Queues +// The C864GM8 has 4 LS Units. +def C4GM8LSU : ProcResource<4>; + +// The three of LSU can be loads. +let Super = C4GM8LSU in +def C4GM8Load : ProcResource<3> { + // The LDQ is 54. + let BufferSize = 54; +} +def C4GM8LoadQueue : LoadQueue<C4GM8Load>; + +// All four of LSU can be loads. +let Super = C4GM8LSU in +def C4GM8Store : ProcResource<4> { + // The STQ is 52. + let BufferSize = 52; +} +def C4GM8StoreQueue : StoreQueue<C4GM8Store>; + +def : ReadAdvance<ReadAfterLd, C864GM8Model.LoadLatency>; +def : ReadAdvance<ReadAfterVecLd, C864GM8Model.VecLoadLatency>; +def : ReadAdvance<ReadAfterVecXLd, C864GM8Model.VecLoadLatency>; +def : ReadAdvance<ReadAfterVecYLd, C864GM8Model.VecLoadLatency>; +def : ReadAdvance<ReadInt2Fpu, -1>; + + +//===----------------------------------------------------------------------===// +// Retire Control Unit +//===----------------------------------------------------------------------===// +def C4GM8RCU : RetireControlUnit<C864GM8Model.MicroOpBufferSize, 8>; + +//===----------------------------------------------------------------------===// +// Basic helper classes. +//===----------------------------------------------------------------------===// + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when dispatched by the schedulers. +// This multiclass defines the resource usage for variants with and without +// folded loads. + + +multiclass __C4GM8WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts, + int Lat = 1, list<int> Res = [], int UOps = 1> { + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ReleaseAtCycles = Res; + let NumMicroOps = UOps; + } +} + +multiclass __C4GM8WriteResPair<X86FoldableSchedWrite SchedRW, + list<ProcResourceKind> ExePorts, int Lat, + list<int> Res, int UOps, int LoadLat, int LoadUOps, + ProcResourceKind AGU, int LoadRes> { + defm : __C4GM8WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; + + defm : __C4GM8WriteRes<SchedRW.Folded, + !listconcat([AGU, C4GM8Load], ExePorts), + !add(LoadLat, Lat), + !if(!and(!empty(Res), !eq(LoadRes, 1)), + [], + !listconcat([... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/204587 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
