jhuber6 updated this revision to Diff 402925.
jhuber6 added a comment.

Squash other uncommitted changes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117048/new/

https://reviews.llvm.org/D117048

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -68,9 +68,14 @@
 
 static cl::opt<std::string> OptLevel("opt-level",
                                      cl::desc("Optimization level for LTO"),
-                                     cl::init("O0"),
+                                     cl::init("O2"),
                                      cl::cat(ClangLinkerWrapperCategory));
 
+static cl::opt<std::string>
+    BitcodeLibrary("target-library",
+                   cl::desc("Path for the target bitcode library"),
+                   cl::cat(ClangLinkerWrapperCategory));
+
 // Do not parse linker options.
 static cl::list<std::string>
     HostLinkerArgs(cl::Sink, cl::desc("<options to be passed to linker>..."));
@@ -201,7 +206,7 @@
       std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
       std::copy(Contents->begin(), Contents->end(), Output->getBufferStart());
       if (Error E = Output->commit())
-        return E;
+        return std::move(E);
 
       DeviceFiles.emplace_back(DeviceTriple, Arch, TempFile);
       ToBeStripped.push_back(*Name);
@@ -229,7 +234,7 @@
     std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
     std::copy(Contents.begin(), Contents.end(), Output->getBufferStart());
     if (Error E = Output->commit())
-      return E;
+      return std::move(E);
     StripFile = TempFile;
   }
 
@@ -318,7 +323,7 @@
     std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
     std::copy(Contents.begin(), Contents.end(), Output->getBufferStart());
     if (Error E = Output->commit())
-      return E;
+      return std::move(E);
 
     DeviceFiles.emplace_back(DeviceTriple, Arch, TempFile);
     ToBeDeleted.push_back(&GV);
@@ -329,7 +334,7 @@
 
   // We need to materialize the lazy module before we make any changes.
   if (Error Err = M->materializeAll())
-    return Err;
+    return std::move(Err);
 
   // Remove the global from the module and write it to a new file.
   for (GlobalVariable *GV : ToBeDeleted) {
@@ -403,7 +408,7 @@
   }
 
   if (Err)
-    return Err;
+    return std::move(Err);
 
   if (!NewMembers)
     return None;
@@ -417,9 +422,9 @@
 
   std::unique_ptr<MemoryBuffer> Buffer =
       MemoryBuffer::getMemBuffer(Library.getMemoryBufferRef(), false);
-  if (Error WriteErr = writeArchive(TempFile, Members, true, Library.kind(),
+  if (Error Err = writeArchive(TempFile, Members, true, Library.kind(),
                                     true, Library.isThin(), std::move(Buffer)))
-    return WriteErr;
+    return std::move(Err);
 
   return static_cast<std::string>(TempFile);
 }
@@ -740,7 +745,7 @@
 
     // Add the bitcode file with its resolved symbols to the LTO job.
     if (Error Err = LTOBackend->add(std::move(BitcodeFile), Resolutions))
-      return Err;
+      return std::move(Err);
   }
 
   // Run the LTO job to compile the bitcode.
@@ -758,7 +763,7 @@
         std::make_unique<llvm::raw_fd_ostream>(FD, true));
   };
   if (Error Err = LTOBackend->run(AddStream))
-    return Err;
+    return std::move(Err);
 
   for (auto &File : Files) {
     if (!TheTriple.isNVPTX())
@@ -976,6 +981,17 @@
     }
   }
 
+  // Add the device bitcode library to the device files if it was passed in.
+  if (!BitcodeLibrary.empty()) {
+    // FIXME: Hacky workaround to avoid a backend crash at O0.
+    if (OptLevel[1] - '0' == 0)
+      OptLevel[1] = '1';
+    auto DeviceAndPath = StringRef(BitcodeLibrary).split('=');
+    auto TripleAndArch = DeviceAndPath.first.rsplit('-');
+    DeviceFiles.emplace_back(TripleAndArch.first, TripleAndArch.second,
+                             DeviceAndPath.second);
+  }
+
   // Link the device images extracted from the linker input.
   SmallVector<std::string, 16> LinkedImages;
   if (Error Err = linkDeviceFiles(DeviceFiles, LinkerArgs, LinkedImages))
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -744,6 +744,10 @@
       return;
     }
 
+    // Link the bitcode library late if we're using device LTO.
+    if (getDriver().isUsingLTO(/* IsOffload */ true))
+      return;
+
     std::string BitcodeSuffix;
     if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
                            options::OPT_fno_openmp_target_new_runtime, true))
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8165,6 +8165,34 @@
           "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1)));
     }
 
+    // Pass in the bitcode library to be linked during LTO.
+    for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
+         ++TI) {
+      const ToolChain *TC = TI->second;
+      const Driver &D = TC->getDriver();
+      const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
+      StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ);
+
+      std::string BitcodeSuffix;
+      if (TCArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
+                         options::OPT_fno_openmp_target_new_runtime, true))
+        BitcodeSuffix += "new-";
+      if (TC->getTriple().isNVPTX())
+        BitcodeSuffix += "nvptx-";
+      else if (TC->getTriple().isAMDGPU())
+        BitcodeSuffix += "amdgpu-";
+      BitcodeSuffix += Arch;
+
+      ArgStringList BitcodeLibrary;
+      addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix,
+                         TC->getTriple());
+
+      if (!BitcodeLibrary.empty())
+        CmdArgs.push_back(
+            Args.MakeArgString("-target-library=" + TC->getTripleString() +
+                               "-" + Arch + "=" + BitcodeLibrary.back()));
+    }
+
     // Pass in the optimization level to use for LTO.
     if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
       StringRef OOpt;
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -285,6 +285,10 @@
   if (DriverArgs.hasArg(options::OPT_nogpulib))
     return;
 
+  // Link the bitcode library late if we're using device LTO.
+  if (getDriver().isUsingLTO(/* IsOffload */ true))
+    return;
+
   std::string BitcodeSuffix;
   if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
                          options::OPT_fno_openmp_target_new_runtime, true))
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to