[PATCH] D130096: [Clang][AMDGPU] Emit AMDGPU library control constants in clang

Joseph Huber via Phabricator via cfe-commits Mon, 29 Aug 2022 19:03:27 -0700

jhuber6 updated this revision to Diff 456520.
jhuber6 added a comment.

Remove unused code gen option.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D130096/new/

https://reviews.llvm.org/D130096

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/CodeGen/TargetInfo.h
  clang/test/CodeGen/amdgcn-control-constants.c

Index: clang/test/CodeGen/amdgcn-control-constants.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/amdgcn-control-constants.c
@@ -0,0 +1,49 @@
+// Check that we generate all the expected default features for the target.
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck %s --check-prefix=GFX90A
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=GFX1030
+
+// GFX90A: @__oclc_wavefrontsize64 = private local_unnamed_addr addrspace(4) constant i8 1
+// GFX90A: @__oclc_daz_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX90A: @__oclc_finite_only_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX90A: @__oclc_unsafe_math_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX90A: @__oclc_correctly_rounded_sqrt32 = private local_unnamed_addr addrspace(4) constant i8 1
+// GFX90A: @__oclc_ISA_version = private local_unnamed_addr addrspace(4) constant i32 9010
+// GFX90A: @__oclc_ABI_version = private local_unnamed_addr addrspace(4) constant i32 400
+
+// GFX1030: @__oclc_wavefrontsize64 = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX1030: @__oclc_daz_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX1030: @__oclc_finite_only_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX1030: @__oclc_unsafe_math_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// GFX1030: @__oclc_correctly_rounded_sqrt32 = private local_unnamed_addr addrspace(4) constant i8 1
+// GFX1030: @__oclc_ISA_version = private local_unnamed_addr addrspace(4) constant i32 10048
+// GFX1030: @__oclc_ABI_version = private local_unnamed_addr addrspace(4) constant i32 400
+
+// Check that we can override the wavefront features.
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1030 -target-feature +wavefrontsize64 \
+// RUN:   -S -emit-llvm -o - %s | FileCheck %s --check-prefix=WAVEFRONT
+// WAVEFRONT: @__oclc_wavefrontsize64 = private local_unnamed_addr addrspace(4) constant i8 1
+
+// Check that we can enable denormalization at zero.
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -fdenormal-fp-math-f32=preserve-sign,preserve-sign \
+// RUN:   -S -emit-llvm -o - %s | FileCheck %s --check-prefix=DENORM-AT-ZERO
+// DENORM-AT-ZERO: @__oclc_daz_opt = private local_unnamed_addr addrspace(4) constant i8 1
+
+// Check that we can enable finite math.
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -ffinite-math-only \
+// RUN:   -S -emit-llvm -o - %s | FileCheck %s --check-prefix=FINITE-MATH
+// FINITE-MATH: @__oclc_finite_only_opt = private local_unnamed_addr addrspace(4) constant i8 1
+// FINITE-MATH: @__oclc_unsafe_math_opt = private local_unnamed_addr addrspace(4) constant i8 0
+
+// Check that we can enable unsafe math.
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -menable-unsafe-fp-math \
+// RUN:   -S -emit-llvm -o - %s | FileCheck %s --check-prefix=UNSAFE-MATH
+// UNSAFE-MATH: @__oclc_finite_only_opt = private local_unnamed_addr addrspace(4) constant i8 0
+// UNSAFE-MATH: @__oclc_unsafe_math_opt = private local_unnamed_addr addrspace(4) constant i8 1
+
+// Check that we can disable/enable correctly rounded square roots.
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -fno-hip-fp32-correctly-rounded-divide-sqrt \
+// RUN:   -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CORRECT-SQRT
+// CORRECT-SQRT: @__oclc_correctly_rounded_sqrt32 = private local_unnamed_addr addrspace(4) constant i8 0
+// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -target-cpu gfx90a -cl-fp32-correctly-rounded-divide-sqrt \
+// RUN:   -disable-llvm-optzns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CL-CORRECT-SQRT
+// CL-CORRECT-SQRT: @__oclc_correctly_rounded_sqrt32 = private local_unnamed_addr addrspace(4) constant i8 1
Index: clang/lib/CodeGen/TargetInfo.h
===================================================================
--- clang/lib/CodeGen/TargetInfo.h
+++ clang/lib/CodeGen/TargetInfo.h
@@ -76,6 +76,9 @@
       CodeGen::CodeGenModule &CGM,
       const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {}
 
+  /// Provides a convenient hook to handle extra target-specific globals.
+  virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
+
   /// Any further codegen related checks that need to be done on a function call
   /// in a target specific manner.
   virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/IntrinsicsS390.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -9288,6 +9289,8 @@
   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
                                  CodeGenModule &CGM) const;
 
+  void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
+
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
@@ -9403,6 +9406,74 @@
   }
 }
 
+/// Emits control constants used to change per-architecture behaviour in the
+/// AMDGPU ROCm device libraries.
+void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
+    CodeGen::CodeGenModule &CGM) const {
+  if (!CGM.getTriple().isAMDGCN())
+    return;
+  StringRef CPU = CGM.getTarget().getTargetOpts().CPU;
+  llvm::AMDGPU::GPUKind Kind = llvm::AMDGPU::parseArchAMDGCN(CPU);
+  unsigned Features = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+  if (Kind == llvm::AMDGPU::GK_NONE)
+    return;
+
+  unsigned Minor;
+  unsigned Major;
+  StringRef Identifier = CPU.drop_while([](char C) { return !isDigit(C); });
+  if (Identifier.take_back(2).getAsInteger(16, Minor) ||
+      Identifier.drop_back(2).getAsInteger(10, Major))
+    return;
+
+  auto AddGlobal = [&](StringRef Name, unsigned Value, unsigned Size) {
+    if (CGM.getModule().getNamedGlobal(Name))
+      return;
+
+    auto *Type =
+        llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), Size);
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), Type, true,
+        llvm::GlobalValue::LinkageTypes::PrivateLinkage,
+        llvm::ConstantInt::get(Type, Value), Name, nullptr,
+        llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
+    GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
+    GV->setAlignment(CGM.getDataLayout().getABITypeAlign(Type));
+  };
+
+  // The wavefront size is 64 if defined by the target or explicitly specified
+  // by the user.
+  bool Wavefront64 =
+      !(Features & llvm::AMDGPU::FEATURE_WAVE32) ||
+      llvm::is_contained(CGM.getTarget().getTargetOpts().FeaturesAsWritten,
+                         "+wavefrontsize64");
+
+  // Different math flags set by the current floating point contract.
+  bool RelaxedMath = CGM.getLangOpts().FastMath;
+  bool UnsafeMath = CGM.getLangOpts().UnsafeFPMath;
+  bool DenormAtZero = CGM.getCodeGenOpts().FP32DenormalMode ==
+                      llvm::DenormalMode::getPreserveSign();
+  bool FiniteOnly =
+      CGM.getLangOpts().NoHonorInfs || CGM.getLangOpts().NoHonorNaNs;
+
+  // Set correct square root rounding depending on the target lanauge.
+  bool CorrectSqrt = CGM.getLangOpts().OpenCL
+                         ? CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt
+                         : CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt;
+
+  // Control constants for math operations.
+  AddGlobal("__oclc_wavefrontsize64", Wavefront64, /*Size=*/8);
+  AddGlobal("__oclc_daz_opt", DenormAtZero, /*Size=*/8);
+  AddGlobal("__oclc_finite_only_opt", FiniteOnly || RelaxedMath, /*Size=*/8);
+  AddGlobal("__oclc_unsafe_math_opt", UnsafeMath || RelaxedMath, /*Size=*/8);
+  AddGlobal("__oclc_correctly_rounded_sqrt32", CorrectSqrt, /*Size=*/8);
+
+  // Control constants for the system.
+  AddGlobal("__oclc_ISA_version", Minor + Major * 1000, /*Size=*/32);
+  AddGlobal("__oclc_ABI_version",
+            CGM.getTarget().getTargetOpts().CodeObjectVersion, /*Size=*/32);
+}
+
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   if (requiresAMDGPUProtectedVisibility(D, GV)) {
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -936,6 +936,7 @@
   if (getCodeGenOpts().SkipRaxSetup)
     getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1);
 
+  getTargetCodeGenInfo().emitTargetGlobals(*this);
   getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
 
   EmitBackendOptionsMetadata(getCodeGenOpts());

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D130096: [Clang][AMDGPU] Emit AMDGPU library control constants in clang

Reply via email to