GGanesh created this revision.
GGanesh added reviewers: RKSimon, craig.topper.
GGanesh created this object with visibility "All Users".
GGanesh added a project: clang.
Herald added a subscriber: cfe-commits.

This patch enables the following

1. AMD family 17h "znver2" tune flag (-march, -mcpu).
2. ISAs that are enabled for "znver2" architecture.
3. For the time being, it uses the znver1 scheduler model.
4. Tests are updated.


Repository:
  rC Clang

https://reviews.llvm.org/D58344

Files:
  include/clang/Basic/X86Target.def
  lib/Basic/Targets/X86.cpp
  test/CodeGen/attr-target-mv.c
  test/CodeGen/target-builtin-noerror.c
  test/Driver/x86-march.c
  test/Frontend/x86-target-cpu.c
  test/Misc/target-invalid-cpu-note.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/Preprocessor/predefined-arch-macros.c
===================================================================
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -2676,8 +2676,102 @@
 // CHECK_ZNVER1_M64: #define __znver1 1
 // CHECK_ZNVER1_M64: #define __znver1__ 1

+// RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M32: #define __ADX__ 1
+// CHECK_ZNVER2_M32: #define __AES__ 1
+// CHECK_ZNVER2_M32: #define __AVX2__ 1
+// CHECK_ZNVER2_M32: #define __AVX__ 1
+// CHECK_ZNVER2_M32: #define __BMI2__ 1
+// CHECK_ZNVER2_M32: #define __BMI__ 1
+// CHECK_ZNVER2_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M32: #define __CLWB__ 1
+// CHECK_ZNVER2_M32: #define __CLZERO__ 1
+// CHECK_ZNVER2_M32: #define __F16C__ 1
+// CHECK_ZNVER2_M32: #define __FMA__ 1
+// CHECK_ZNVER2_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M32: #define __LZCNT__ 1
+// CHECK_ZNVER2_M32: #define __MMX__ 1
+// CHECK_ZNVER2_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M32: #define __POPCNT__ 1
+// CHECK_ZNVER2_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M32: #define __RDPID__ 1
+// CHECK_ZNVER2_M32: #define __RDRND__ 1
+// CHECK_ZNVER2_M32: #define __RDSEED__ 1
+// CHECK_ZNVER2_M32: #define __SHA__ 1
+// CHECK_ZNVER2_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE2__ 1
+// CHECK_ZNVER2_M32: #define __SSE3__ 1
+// CHECK_ZNVER2_M32: #define __SSE4A__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE__ 1
+// CHECK_ZNVER2_M32: #define __SSSE3__ 1
+// CHECK_ZNVER2_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M32: #define __XSAVES__ 1
+// CHECK_ZNVER2_M32: #define __XSAVE__ 1
+// CHECK_ZNVER2_M32: #define __i386 1
+// CHECK_ZNVER2_M32: #define __i386__ 1
+// CHECK_ZNVER2_M32: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M32: #define __znver2 1
+// CHECK_ZNVER2_M32: #define __znver2__ 1
+
+// RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M64: #define __ADX__ 1
+// CHECK_ZNVER2_M64: #define __AES__ 1
+// CHECK_ZNVER2_M64: #define __AVX2__ 1
+// CHECK_ZNVER2_M64: #define __AVX__ 1
+// CHECK_ZNVER2_M64: #define __BMI2__ 1
+// CHECK_ZNVER2_M64: #define __BMI__ 1
+// CHECK_ZNVER2_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M64: #define __CLWB__ 1
+// CHECK_ZNVER2_M64: #define __CLZERO__ 1
+// CHECK_ZNVER2_M64: #define __F16C__ 1
+// CHECK_ZNVER2_M64: #define __FMA__ 1
+// CHECK_ZNVER2_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M64: #define __LZCNT__ 1
+// CHECK_ZNVER2_M64: #define __MMX__ 1
+// CHECK_ZNVER2_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M64: #define __POPCNT__ 1
+// CHECK_ZNVER2_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M64: #define __RDPID__ 1
+// CHECK_ZNVER2_M64: #define __RDRND__ 1
+// CHECK_ZNVER2_M64: #define __RDSEED__ 1
+// CHECK_ZNVER2_M64: #define __SHA__ 1
+// CHECK_ZNVER2_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE2__ 1
+// CHECK_ZNVER2_M64: #define __SSE3__ 1
+// CHECK_ZNVER2_M64: #define __SSE4A__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE__ 1
+// CHECK_ZNVER2_M64: #define __SSSE3__ 1
+// CHECK_ZNVER2_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M64: #define __XSAVES__ 1
+// CHECK_ZNVER2_M64: #define __XSAVE__ 1
+// CHECK_ZNVER2_M64: #define __amd64 1
+// CHECK_ZNVER2_M64: #define __amd64__ 1
+// CHECK_ZNVER2_M64: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M64: #define __x86_64 1
+// CHECK_ZNVER2_M64: #define __x86_64__ 1
+// CHECK_ZNVER2_M64: #define __znver2 1
+// CHECK_ZNVER2_M64: #define __znver2__ 1
+
 // End X86/GCC/Linux tests ------------------

 // Begin PPC/GCC/Linux tests ----------------
 // Check that VSX also turns on altivec.
 // RUN: %clang -mvsx -E -dM %s -o - 2>&1 \
Index: test/Misc/target-invalid-cpu-note.c
===================================================================
--- test/Misc/target-invalid-cpu-note.c
+++ test/Misc/target-invalid-cpu-note.c
@@ -19,9 +19,9 @@
 // X86-SAME: skx, cascadelake, cannonlake, icelake-client, icelake-server, knl, knm, lakemont, k6, k6-2, k6-3,
 // X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
 // X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
-// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1,
+// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
 // X86-SAME: x86-64, geode

 // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
 // X86_64: error: unknown target CPU 'not-a-cpu'
 // X86_64: note: valid target CPU values are: nocona, core2, penryn, bonnell,
@@ -30,38 +30,38 @@
 // X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cannonlake,
 // X86_64-SAME: icelake-client, icelake-server, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3,
 // X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1,
-// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, x86-64
+// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64

 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
 // NVPTX: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35,
 // NVPTX-SAME: sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72

 // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
 // R600: error: unknown target CPU 'not-a-cpu'
-// R600: note: valid target CPU values are: r600, rv630, rv635, r630, rs780,
-// R600-SAME: rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar,
-// R600-SAME: palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts,
+// R600: note: valid target CPU values are: r600, rv630, rv635, r630, rs780,
+// R600-SAME: rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar,
+// R600-SAME: palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts,
 // R600-SAME: caicos, aruba, cayman, turks


 // RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
 // AMDGCN: error: unknown target CPU 'not-a-cpu'
 // AMDGCN: note: valid target CPU values are: gfx600, tahiti, gfx601, hainan,
 // AMDGCN-SAME: oland, pitcairn, verde, gfx700, kaveri, gfx701, hawaii, gfx702,
-// AMDGCN-SAME: gfx703, kabini, mullins, gfx704, bonaire, gfx801, carrizo,
+// AMDGCN-SAME: gfx703, kabini, mullins, gfx704, bonaire, gfx801, carrizo,
 // AMDGCN-SAME: gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11,
 // AMDGCN-SAME: gfx810, stoney, gfx900, gfx902

 // RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
 // WEBASM: error: unknown target CPU 'not-a-cpu'
 // WEBASM: note: valid target CPU values are: mvp, bleeding-edge, generic

 // RUN: not %clang_cc1 -triple systemz--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SYSTEMZ
 // SYSTEMZ: error: unknown target CPU 'not-a-cpu'
 // SYSTEMZ: note: valid target CPU values are: arch8, z10, arch9, z196, arch10,
 // SYSTEMZ-SAME: zEC12, arch11, z13, arch12, z14

 // RUN: not %clang_cc1 -triple sparc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARC
 // SPARC: error: unknown target CPU 'not-a-cpu'
 // SPARC: note: valid target CPU values are: v8, supersparc, sparclite, f934,
Index: test/Frontend/x86-target-cpu.c
===================================================================
--- test/Frontend/x86-target-cpu.c
+++ test/Frontend/x86-target-cpu.c
@@ -35,5 +35,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu btver1 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu btver2 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
 //
 // expected-no-diagnostics
Index: test/Driver/x86-march.c
===================================================================
--- test/Driver/x86-march.c
+++ test/Driver/x86-march.c
@@ -159,3 +159,7 @@
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver1 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=znver1
 // znver1: "-target-cpu" "znver1"
+//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=znver2
+// znver2: "-target-cpu" "znver2"
Index: test/CodeGen/target-builtin-noerror.c
===================================================================
--- test/CodeGen/target-builtin-noerror.c
+++ test/CodeGen/target-builtin-noerror.c
@@ -120,4 +120,5 @@
   (void)__builtin_cpu_is("tremont");
   (void)__builtin_cpu_is("westmere");
   (void)__builtin_cpu_is("znver1");
+  (void)__builtin_cpu_is("znver2");
 }
Index: test/CodeGen/attr-target-mv.c
===================================================================
--- test/CodeGen/attr-target-mv.c
+++ test/CodeGen/attr-target-mv.c
@@ -130,7 +130,7 @@
 // LINUX: and i32 %{{.*}}, 4352
 // LINUX: icmp eq i32 %{{.*}}, 4352
 // LINUX: ret void (i32, double)* @foo_multi.fma4_sse4.2
-// LINUX: icmp eq i32 %{{.*}}, 12
+// LINUX: icmp eq i32 %{{.*}}, 13
 // LINUX: and i32 %{{.*}}, 4352
 // LINUX: icmp eq i32 %{{.*}}, 4352
 // LINUX: ret void (i32, double)* @foo_multi.arch_ivybridge_fma4_sse4.2
@@ -144,7 +144,7 @@
 // WINDOWS: icmp eq i32 %{{.*}}, 4352
 // WINDOWS: call void @foo_multi.fma4_sse4.2(i32 %0, double %1)
 // WINDOWS-NEXT: ret void
-// WINDOWS: icmp eq i32 %{{.*}}, 12
+// WINDOWS: icmp eq i32 %{{.*}}, 13
 // WINDOWS: and i32 %{{.*}}, 4352
 // WINDOWS: icmp eq i32 %{{.*}}, 4352
 // WINDOWS: call void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %0, double %1)
Index: lib/Basic/Targets/X86.cpp
===================================================================
--- lib/Basic/Targets/X86.cpp
+++ lib/Basic/Targets/X86.cpp
@@ -347,6 +347,11 @@
     setFeatureEnabledImpl(Features, "sahf", true);
     break;

+  case CK_ZNVER2:
+    setFeatureEnabledImpl(Features, "clwb", true);
+    setFeatureEnabledImpl(Features, "rdpid", true);
+    setFeatureEnabledImpl(Features, "wbnoinvd", true);
+    LLVM_FALLTHROUGH;
   case CK_ZNVER1:
     setFeatureEnabledImpl(Features, "adx", true);
     setFeatureEnabledImpl(Features, "aes", true);
@@ -864,14 +869,14 @@
 /// definitions for this particular subtarget.
 void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
                                      MacroBuilder &Builder) const {
-  // Inline assembly supports X86 flag outputs.
+  // Inline assembly supports X86 flag outputs.
   Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");

   std::string CodeModel = getTargetOpts().CodeModel;
   if (CodeModel == "default")
     CodeModel = "small";
   Builder.defineMacro("__code_model_" + CodeModel + "_");

   // Target identification.
   if (getTriple().getArch() == llvm::Triple::x86_64) {
     Builder.defineMacro("__amd64__");
@@ -1030,73 +1035,76 @@
   case CK_ZNVER1:
     defineCPUMacros(Builder, "znver1");
     break;
+  case CK_ZNVER2:
+    defineCPUMacros(Builder, "znver2");
+    break;
   case CK_Geode:
     defineCPUMacros(Builder, "geode");
     break;
   }

   // Target properties.
   Builder.defineMacro("__REGISTER_PREFIX__", "");

   // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline
   // functions in glibc header files that use FP Stack inline asm which the
   // backend can't deal with (PR879).
   Builder.defineMacro("__NO_MATH_INLINES");

   if (HasAES)
     Builder.defineMacro("__AES__");

   if (HasVAES)
     Builder.defineMacro("__VAES__");

   if (HasPCLMUL)
     Builder.defineMacro("__PCLMUL__");

   if (HasVPCLMULQDQ)
     Builder.defineMacro("__VPCLMULQDQ__");

   if (HasLZCNT)
     Builder.defineMacro("__LZCNT__");

   if (HasRDRND)
     Builder.defineMacro("__RDRND__");

   if (HasFSGSBASE)
     Builder.defineMacro("__FSGSBASE__");

   if (HasBMI)
     Builder.defineMacro("__BMI__");

   if (HasBMI2)
     Builder.defineMacro("__BMI2__");

   if (HasPOPCNT)
     Builder.defineMacro("__POPCNT__");

   if (HasRTM)
     Builder.defineMacro("__RTM__");

   if (HasPRFCHW)
     Builder.defineMacro("__PRFCHW__");

   if (HasRDSEED)
     Builder.defineMacro("__RDSEED__");

   if (HasADX)
     Builder.defineMacro("__ADX__");

   if (HasTBM)
     Builder.defineMacro("__TBM__");

   if (HasLWP)
     Builder.defineMacro("__LWP__");

   if (HasMWAITX)
     Builder.defineMacro("__MWAITX__");

   if (HasMOVBE)
     Builder.defineMacro("__MOVBE__");

   switch (XOPLevel) {
   case XOP:
     Builder.defineMacro("__XOP__");
Index: include/clang/Basic/X86Target.def
===================================================================
--- include/clang/Basic/X86Target.def
+++ include/clang/Basic/X86Target.def
@@ -236,21 +236,22 @@
 /// Zen architecture processors.
 //@{
 PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2)
+PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2)
 //@}

 /// This specification is deprecated and will be removed in the future.
 /// Users should prefer K8.
 // FIXME: Warn on this when the CPU is set to it.
 //@{
 PROC(x86_64, "x86-64", PROC_64_BIT)
 //@}

 /// \name Geode
 /// Geode processors.
 //@{
 PROC(Geode, "geode", PROC_32_BIT)
 //@}

 // List of CPU Supports features in order.  These need to remain in the order
 // required by attribute 'target' checking.  Note that not all are supported/
 // prioritized by GCC, so synchronization with GCC's implementation may require
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D58344: E... Ganesh Gopalasubramanian via Phabricator via cfe-commits

Reply via email to