jhuber6 updated this revision to Diff 518604.
jhuber6 added a comment.

Add option to allow overriding the global hash.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/Cuda.h
  clang/test/Driver/cuda-cross-compiling.c
  llvm/lib/Target/NVPTX/CMakeLists.txt
  llvm/lib/Target/NVPTX/NVPTX.h
  llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
  llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
  llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

Index: llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \
+; RUN:     -nvptx-lower-global-ctor-dtor-id=unique_id < %s | FileCheck %s --check-prefix=GLOBAL
+
+; Make sure we get the same result if we run multiple times
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+
+@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
+@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
+
+; CHECK-NOT: @llvm.global_ctors
+; CHECK-NOT: @llvm.global_dtors
+
+; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
+; GLOBAL: @__init_array_object_foo_unique_id_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; GLOBAL: @__fini_array_object_bar_unique_id_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; GLOBAL: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_unique_id_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_unique_id_1 to ptr)], section "llvm.metadata"
+
+; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
+; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;
+
+define internal void @foo() {
+  ret void
+}
+
+define internal void @bar() {
+  ret void
+}
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "NVPTXAliasAnalysis.h"
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXAtomicLower.h"
+#include "NVPTXCtorDtorLowering.h"
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXTargetObjectFile.h"
@@ -68,8 +69,10 @@
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXAtomicLowerPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 void initializeNVVMIntrRangePass(PassRegistry &);
@@ -95,6 +98,7 @@
   initializeNVPTXAtomicLowerPass(PR);
   initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
+  initializeNVPTXCtorDtorLoweringLegacyPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
   initializeNVPTXProxyRegErasurePass(PR);
   initializeNVPTXDAGToDAGISelPass(PR);
@@ -249,6 +253,10 @@
   PB.registerPipelineParsingCallback(
       [](StringRef PassName, ModulePassManager &PM,
          ArrayRef<PassBuilder::PipelineElement>) {
+        if (PassName == "nvptx-lower-ctor-dtor") {
+          PM.addPass(NVPTXCtorDtorLoweringPass());
+          return true;
+        }
         if (PassName == "generic-to-nvvm") {
           PM.addPass(GenericToNVVMPass());
           return true;
@@ -369,6 +377,7 @@
   }
 
   addPass(createAtomicExpandPass());
+  addPass(createNVPTXCtorDtorLoweringLegacyPass());
 
   // === LSR and other generic IR passes ===
   TargetPassConfig::addIRPasses();
Index: llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
===================================================================
--- /dev/null
+++ llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
@@ -0,0 +1,30 @@
+//===-- NVPTXCtorDtorLowering.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+class PassRegistry;
+
+extern char &NVPTXCtorDtorLoweringLegacyPassID;
+extern void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
+
+/// Lower llvm.global_ctors and llvm.global_dtors to special kernels.
+class NVPTXCtorDtorLoweringPass
+    : public PassInfoMixin<NVPTXCtorDtorLoweringPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
Index: llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
@@ -0,0 +1,116 @@
+//===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This pass creates a unified init and fini kernel with the required metadata
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXCtorDtorLowering.h"
+#include "NVPTX.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-lower-ctor-dtor"
+
+static cl::opt<std::string>
+    GlobalStr("nvptx-lower-global-ctor-dtor-id",
+              cl::desc("Override the name of ctor/dtor globals."), cl::init(""),
+              cl::Hidden);
+
+namespace {
+
+static std::string getHash(StringRef Str) {
+  llvm::MD5 Hasher;
+  llvm::MD5::MD5Result Hash;
+  Hasher.update(Str);
+  Hasher.final(Hash);
+  return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
+}
+
+static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName,
+                                   bool IsCtor) {
+  GlobalVariable *GV = M.getGlobalVariable(GlobalName);
+  if (!GV || !GV->hasInitializer())
+    return false;
+  ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!GA || GA->getNumOperands() == 0)
+    return false;
+
+  // NVPTX has no way to emit variables at specific sections or support for
+  // the traditional constructor sections. Instead, we emit mangled global
+  // names so the runtime can build the list manually.
+  for (Value *V : GA->operands()) {
+    auto *CS = cast<ConstantStruct>(V);
+    auto *F = cast<Constant>(CS->getOperand(1));
+    uint64_t Priority = cast<ConstantInt>(CS->getOperand(0))->getSExtValue();
+    std::string PriorityStr = "." + std::to_string(Priority);
+    // We append a semi-unique hash and the priority to the global name.
+    std::string GlobalID =
+        !GlobalStr.empty() ? GlobalStr : getHash(M.getSourceFileName());
+    std::string NameStr =
+        ((IsCtor ? "__init_array_object_" : "__fini_array_object_") +
+         F->getName() + "_" + GlobalID + "_" + std::to_string(Priority))
+            .str();
+    // PTX does not support exported names with '.' in them.
+    llvm::transform(NameStr, NameStr.begin(),
+                    [](char c) { return c == '.' ? '_' : c; });
+
+    auto *GV = new GlobalVariable(M, F->getType(), /*IsConstant=*/true,
+                                  GlobalValue::ExternalLinkage, F, NameStr,
+                                  nullptr, GlobalValue::NotThreadLocal,
+                                  /*AddressSpace=*/4);
+    // This isn't respected by Nvidia, simply put here for clarity.
+    GV->setSection(IsCtor ? ".init_array" + PriorityStr
+                          : ".fini_array" + PriorityStr);
+    GV->setVisibility(GlobalVariable::ProtectedVisibility);
+    appendToUsed(M, {GV});
+  }
+
+  GV->eraseFromParent();
+  return true;
+}
+
+static bool lowerCtorsAndDtors(Module &M) {
+  bool Modified = false;
+  Modified |= createInitOrFiniGlobls(M, "llvm.global_ctors", /*IsCtor =*/true);
+  Modified |= createInitOrFiniGlobls(M, "llvm.global_dtors", /*IsCtor =*/false);
+  return Modified;
+}
+
+class NVPTXCtorDtorLoweringLegacy final : public ModulePass {
+public:
+  static char ID;
+  NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {}
+  bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }
+};
+
+} // End anonymous namespace
+
+PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M,
+                                                 ModuleAnalysisManager &AM) {
+  return lowerCtorsAndDtors(M) ? PreservedAnalyses::none()
+                               : PreservedAnalyses::all();
+}
+
+char NVPTXCtorDtorLoweringLegacy::ID = 0;
+char &llvm::NVPTXCtorDtorLoweringLegacyPassID = NVPTXCtorDtorLoweringLegacy::ID;
+INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE,
+                "Lower ctors and dtors for NVPTX", false, false)
+
+ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() {
+  return new NVPTXCtorDtorLoweringLegacy();
+}
Index: llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -92,6 +92,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool>
+    LowerCtorDtor("nvptx-lower-global-ctor-dtor",
+                  cl::desc("Lower GPU ctor / dtors to globals on the device."),
+                  cl::init(false), cl::Hidden);
+
 #define DEPOTNAME "__local_depot"
 
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -788,12 +793,14 @@
     report_fatal_error("Module has aliases, which NVPTX does not support.");
     return true; // error
   }
-  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
+  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
+      !LowerCtorDtor) {
     report_fatal_error(
         "Module has a nontrivial global ctor, which NVPTX does not support.");
     return true;  // error
   }
-  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
+  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
+      !LowerCtorDtor) {
     report_fatal_error(
         "Module has a nontrivial global dtor, which NVPTX does not support.");
     return true;  // error
Index: llvm/lib/Target/NVPTX/NVPTX.h
===================================================================
--- llvm/lib/Target/NVPTX/NVPTX.h
+++ llvm/lib/Target/NVPTX/NVPTX.h
@@ -39,6 +39,7 @@
                                  llvm::CodeGenOpt::Level OptLevel);
 ModulePass *createNVPTXAssignValidGlobalNamesPass();
 ModulePass *createGenericToNVVMLegacyPass();
+ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
 FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
 FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
 MachineFunctionPass *createNVPTXPrologEpilogPass();
Index: llvm/lib/Target/NVPTX/CMakeLists.txt
===================================================================
--- llvm/lib/Target/NVPTX/CMakeLists.txt
+++ llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -37,6 +37,7 @@
   NVVMIntrRange.cpp
   NVVMReflect.cpp
   NVPTXProxyRegErasure.cpp
+  NVPTXCtorDtorLowering.cpp
   )
 
 add_llvm_target(NVPTXCodeGen
Index: clang/test/Driver/cuda-cross-compiling.c
===================================================================
--- clang/test/Driver/cuda-cross-compiling.c
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -68,3 +68,12 @@
 //      DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_35" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s"
 // DEFAULT-NEXT: ptxas{{.*}}"-m64" "-O0" "--gpu-name" "sm_35" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c"
 // DEFAULT-NEXT: nvlink{{.*}}"-o" "a.out" "-arch" "sm_35" {{.*}} "[[CUBIN]].cubin"
+
+//
+// Test to ensure that we enable handling global constructors in a freestanding
+// Nvidia compilation.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=LOWERING %s
+
+// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
Index: clang/lib/Driver/ToolChains/Cuda.h
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.h
+++ clang/lib/Driver/ToolChains/Cuda.h
@@ -132,8 +132,8 @@
 class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
 public:
   NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
-                 const llvm::Triple &HostTriple,
-                 const llvm::opt::ArgList &Args);
+                 const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args,
+                 bool Freestanding);
 
   NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
                  const llvm::opt::ArgList &Args);
@@ -142,6 +142,11 @@
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
                 Action::OffloadKind DeviceOffloadKind) const override;
 
+  void
+  addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                        llvm::opt::ArgStringList &CC1Args,
+                        Action::OffloadKind DeviceOffloadKind) const override;
+
   // Never try to use the integrated assembler with CUDA; always fork out to
   // ptxas.
   bool useIntegratedAs() const override { return false; }
@@ -168,6 +173,9 @@
 protected:
   Tool *buildAssembler() const override; // ptxas.
   Tool *buildLinker() const override;    // nvlink.
+
+private:
+  bool Freestanding = false;
 };
 
 class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -695,8 +695,9 @@
 /// toolchain.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
                                const llvm::Triple &HostTriple,
-                               const ArgList &Args)
-    : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
+                               const ArgList &Args, bool Freestanding = false)
+    : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
+      Freestanding(Freestanding) {
   if (CudaInstallation.isValid()) {
     CudaInstallation.WarnIfUnsupportedVersion();
     getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
@@ -711,7 +712,8 @@
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
                                const ArgList &Args)
     : NVPTXToolChain(D, Triple,
-                     llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args) {}
+                     llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args,
+                     /*Freestanding=*/true) {}
 
 llvm::opt::DerivedArgList *
 NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
@@ -735,6 +737,16 @@
   return DAL;
 }
 
+void NVPTXToolChain::addClangTargetOptions(
+    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadingKind) const {
+  // If we are compiling with a standalone NVPTX toolchain we want to try to
+  // mimic a standard environment as much as possible. So we enable lowering
+  // ctor / dtor functions to global symbols that can be registered.
+  if (Freestanding)
+    CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
+}
+
 bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
   const Option &O = A->getOption();
   return (O.matches(options::OPT_gN_Group) &&
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to