jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, gregrodgers, JonChesterfield, ronlieb.
Herald added subscribers: guansong, hiraditya, inglorion, yaxunl, mgorny.
jhuber6 requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, sstefan1.
Herald added projects: clang, LLVM.

This patch implements the fist support for handling LTO in the
offloading pipeline. The flag `-foffload-lto` is used to control if
bitcode is embedded into the device. If bitcode is found in the device,
the extracted files will be sent to the LTO pipeline to be linked and
sent to the backend. This implementation does not separately link the
device bitcode libraries yet.

Depends on D116675 <https://reviews.llvm.org/D116675>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116975

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/tools/clang-linker-wrapper/CMakeLists.txt
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  llvm/lib/Passes/PassBuilderPipelines.cpp

Index: llvm/lib/Passes/PassBuilderPipelines.cpp
===================================================================
--- llvm/lib/Passes/PassBuilderPipelines.cpp
+++ llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1464,6 +1464,9 @@
   // libraries and other oracles.
   MPM.addPass(InferFunctionAttrsPass());
 
+  if (Level.getSpeedupLevel() > 1)
+    MPM.addPass(OpenMPOptPass());
+
   if (Level.getSpeedupLevel() > 1) {
     FunctionPassManager EarlyFPM;
     EarlyFPM.addPass(CallSiteSplittingPass());
@@ -1773,6 +1776,7 @@
   MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
   CGSCCPassManager CGPM;
   CGPM.addPass(CoroSplitPass());
+  CGPM.addPass(OpenMPOptCGSCCPass());
   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
   MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
 
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -17,9 +17,12 @@
 #include "clang/Basic/Version.h"
 #include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
+#include "llvm/LTO/LTO.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/Binary.h"
@@ -36,6 +39,7 @@
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/StringSaver.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -58,6 +62,15 @@
                                            cl::desc("Path of linker binary"),
                                            cl::cat(ClangLinkerWrapperCategory));
 
+static cl::opt<std::string>
+    TargetFeatures("target-feature", cl::desc("Target features for triple"),
+                   cl::cat(ClangLinkerWrapperCategory));
+
+static cl::opt<std::string> OptLevel("opt-level",
+                                     cl::desc("Optimization level for LTO"),
+                                     cl::init("O0"),
+                                     cl::cat(ClangLinkerWrapperCategory));
+
 // Do not parse linker options.
 static cl::list<std::string>
     HostLinkerArgs(cl::Sink, cl::desc("<options to be passed to linker>..."));
@@ -68,6 +81,9 @@
 /// Temporary files created by the linker wrapper.
 static SmallVector<std::string, 16> TempFiles;
 
+/// Codegen flags for LTO backend.
+static codegen::RegisterCodeGenFlags CodeGenFlags;
+
 /// Magic section string that marks the existence of offloading data. The
 /// section string will be formatted as `.llvm.offloading.<triple>.<arch>`.
 #define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading."
@@ -195,6 +211,28 @@
   if (ToBeStripped.empty())
     return None;
 
+  // If the object file to strip doesn't exist we need to write it so we can
+  // pass it to llvm-strip.
+  SmallString<128> StripFile = Obj.getFileName();
+  if (!sys::fs::exists(StripFile)) {
+    SmallString<128> TempFile;
+    if (std::error_code EC = sys::fs::createTemporaryFile(
+            sys::path::stem(StripFile), "o", TempFile))
+      return createFileError(TempFile, EC);
+    TempFiles.push_back(static_cast<std::string>(TempFile));
+
+    auto Contents = Obj.getMemoryBufferRef().getBuffer();
+    Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
+        FileOutputBuffer::create(TempFile, Contents.size());
+    if (!OutputOrErr)
+      return OutputOrErr.takeError();
+    std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
+    std::copy(Contents.begin(), Contents.end(), Output->getBufferStart());
+    if (Error E = Output->commit())
+      return E;
+    StripFile = TempFile;
+  }
+
   // We will use llvm-strip to remove the now unneeded section containing the
   // offloading code.
   ErrorOr<std::string> StripPath = sys::findProgramByName(
@@ -214,7 +252,7 @@
   SmallVector<StringRef, 8> StripArgs;
   StripArgs.push_back(*StripPath);
   StripArgs.push_back("--no-strip-all");
-  StripArgs.push_back(Obj.getFileName());
+  StripArgs.push_back(StripFile);
   for (auto &Section : ToBeStripped) {
     StripArgs.push_back("--remove-section");
     StripArgs.push_back(Section);
@@ -419,6 +457,44 @@
 
 // TODO: Move these to a separate file.
 namespace nvptx {
+Expected<std::string> assemble(StringRef InputFile, Triple TheTriple,
+                               StringRef Arch) {
+  // NVPTX uses the nvlink binary to link device object files.
+  ErrorOr<std::string> PtxasPath =
+      sys::findProgramByName("ptxas", sys::path::parent_path(LinkerExecutable));
+  if (!PtxasPath)
+    PtxasPath = sys::findProgramByName("ptxas");
+  if (!PtxasPath)
+    return createStringError(PtxasPath.getError(),
+                             "Unable to find 'ptxas' in path");
+
+  // Create a new file to write the linked device image to.
+  SmallString<128> TempFile;
+  if (std::error_code EC = sys::fs::createTemporaryFile(
+          TheTriple.getArchName() + "-" + Arch, "cubin", TempFile))
+    return createFileError(TempFile, EC);
+  TempFiles.push_back(static_cast<std::string>(TempFile));
+
+  // TODO: Pass in arguments like `-g` and `-v` from the driver.
+  SmallVector<StringRef, 16> CmdArgs;
+  std::string Opt = "-" + OptLevel;
+  CmdArgs.push_back(*PtxasPath);
+  CmdArgs.push_back(TheTriple.isArch64Bit() ? "-m64" : "-m32");
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(TempFile);
+  CmdArgs.push_back(Opt);
+  CmdArgs.push_back("--gpu-name");
+  CmdArgs.push_back(Arch);
+  CmdArgs.push_back("-c");
+
+  CmdArgs.push_back(InputFile);
+
+  if (sys::ExecuteAndWait(*PtxasPath, CmdArgs))
+    return createStringError(inconvertibleErrorCode(), "'ptxas' failed");
+
+  return static_cast<std::string>(TempFile);
+}
+
 Expected<std::string> link(ArrayRef<StringRef> InputFiles,
                            ArrayRef<std::string> LinkerArgs, Triple TheTriple,
                            StringRef Arch) {
@@ -482,6 +558,221 @@
   }
 }
 
+void diagnosticHandler(const DiagnosticInfo &DI) {
+  std::string ErrStorage;
+  raw_string_ostream OS(ErrStorage);
+  DiagnosticPrinterRawOStream DP(OS);
+  DI.print(DP);
+
+  switch (DI.getSeverity()) {
+  case DS_Error:
+    WithColor::error(errs(), LinkerExecutable) << ErrStorage;
+    break;
+  case DS_Warning:
+    WithColor::warning(errs(), LinkerExecutable) << ErrStorage;
+    break;
+  case DS_Note:
+    WithColor::note(errs(), LinkerExecutable) << ErrStorage;
+    break;
+  case DS_Remark:
+    WithColor::remark(errs(), LinkerExecutable) << ErrStorage;
+    break;
+  }
+}
+
+// Get the target features passed in from the driver as <triple>=<features>.
+std::vector<std::string> getTargetFeatures(const Triple &TheTriple) {
+  std::vector<std::string> Features;
+  auto TargetAndFeatures = StringRef(TargetFeatures).split('=');
+  if (TargetAndFeatures.first != TheTriple.getTriple())
+    return Features;
+
+  for (auto Feature : llvm::split(TargetAndFeatures.second, ','))
+    Features.push_back(Feature.str());
+  return Features;
+}
+
+CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
+  switch (OptLevel) {
+  case 0:
+    return CodeGenOpt::None;
+  case 1:
+    return CodeGenOpt::Less;
+  case 2:
+    return CodeGenOpt::Default;
+  case 3:
+    return CodeGenOpt::Aggressive;
+  }
+  llvm_unreachable("Invalid optimization level");
+}
+
+std::unique_ptr<lto::LTO> createLTO(const Triple &TheTriple, StringRef Arch,
+                                    bool WholeProgram) {
+  lto::Config Conf;
+  lto::ThinBackend Backend;
+  // TODO: Handle index-only thin-LTO
+  Backend = lto::createInProcessThinBackend(
+      llvm::heavyweight_hardware_concurrency(1));
+
+  Conf.CPU = Arch.str();
+  Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
+
+  Conf.MAttrs = getTargetFeatures(TheTriple);
+  Conf.CGOptLevel = getCGOptLevel(OptLevel[1] - '0');
+  Conf.OptLevel = OptLevel[1] - '0';
+  Conf.DefaultTriple = TheTriple.getTriple();
+  Conf.DiagHandler = diagnosticHandler;
+
+  Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
+  Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
+
+  // TODO: Handle outputting bitcode using a module hook.
+  if (TheTriple.isNVPTX())
+    Conf.CGFileType = CGFT_AssemblyFile;
+  else
+    Conf.CGFileType = CGFT_ObjectFile;
+
+  // TODO: Handle remark files
+  Conf.HasWholeProgramVisibility = WholeProgram;
+
+  return std::make_unique<lto::LTO>(std::move(Conf), Backend);
+}
+
+// Returns true if \p S is valid as a C language identifier and will be given
+// `__start_` and `__stop_` symbols.
+bool isValidCIdentifier(StringRef S) {
+  return !S.empty() && (isAlpha(S[0]) || S[0] == '_') &&
+         std::all_of(S.begin() + 1, S.end(),
+                     [](char C) { return C == '_' || isAlnum(C); });
+}
+
+Expected<Optional<std::string>> linkBitcodeFiles(ArrayRef<StringRef> InputFiles,
+                                                 const Triple &TheTriple,
+                                                 StringRef Arch) {
+  SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;
+  SmallVector<std::unique_ptr<lto::InputFile>, 4> BitcodeFiles;
+  StringMap<bool> UsedInRegularObj;
+
+  // Search for bitcode files in the input and create an LTO input file. If it
+  // is not a bitcode file, scan its symbol table for symbols we need to
+  // save.
+  for (StringRef File : InputFiles) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+        MemoryBuffer::getFileOrSTDIN(File);
+    if (std::error_code EC = BufferOrErr.getError())
+      return createFileError(File, EC);
+
+    file_magic Type = identify_magic((*BufferOrErr)->getBuffer());
+    if (Type != file_magic::bitcode) {
+      Expected<std::unique_ptr<ObjectFile>> ObjFile =
+          ObjectFile::createObjectFile(**BufferOrErr, Type);
+      if (!ObjFile)
+        return ObjFile.takeError();
+
+      for (auto &Sym : (*ObjFile)->symbols()) {
+        Expected<StringRef> Name = Sym.getName();
+        if (!Name)
+          return Name.takeError();
+
+        UsedInRegularObj[*Name] = true;
+      }
+    } else {
+      Expected<std::unique_ptr<lto::InputFile>> InputFileOrErr =
+          llvm::lto::InputFile::create(**BufferOrErr);
+      if (!InputFileOrErr)
+        return InputFileOrErr.takeError();
+
+      BitcodeFiles.push_back(std::move(*InputFileOrErr));
+      SavedBuffers.push_back(std::move(*BufferOrErr));
+    }
+  }
+
+  if (BitcodeFiles.empty())
+    return None;
+
+  // We have visibility of the whole program if every input is bitcode, all
+  // inputs are statically linked so there should be no external references.
+  bool WholeProgram = BitcodeFiles.size() == InputFiles.size();
+  StringMap<bool> PrevailingSymbols;
+
+  // TODO: Run more tests to verify that this is correct.
+  // Create the LTO instance with the necessary config and add the bitcode files
+  // to it after resolving symbols. We make a few assumptions about symbol
+  // resolution.
+  // 1. The target is going to be a stand-alone executable file.
+  // 2. We do not support relocatable object files.
+  // 3. All inputs are relocatable object files extracted from host binaries, so
+  //    there is no resolution to a dynamic library.
+  auto LTOBackend = createLTO(TheTriple, Arch, WholeProgram);
+  for (auto &BitcodeFile : BitcodeFiles) {
+    const auto Symbols = BitcodeFile->symbols();
+    SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
+    size_t Idx = 0;
+    for (auto &Sym : Symbols) {
+      lto::SymbolResolution &Res = Resolutions[Idx++];
+
+      // We will use this as the prevailing symbol definition in LTO unless
+      // it is undefined in the module or another symbol has already been used.
+      Res.Prevailing = !Sym.isUndefined() && !PrevailingSymbols[Sym.getName()];
+
+      // We need LTO to preserve symbols referenced in other object files, or
+      // are needed by the rest of the toolchain.
+      Res.VisibleToRegularObj =
+          UsedInRegularObj[Sym.getName()] ||
+          isValidCIdentifier(Sym.getSectionName()) ||
+          (Res.Prevailing && Sym.getName().startswith("__omp"));
+
+      // We do not currently support shared libraries, so no symbols will be
+      // referenced externally by shared libraries.
+      Res.ExportDynamic = false;
+
+      // The result will currently always be an executable, so the only time the
+      // definition will not reside in this link unit is if it's undefined.
+      Res.FinalDefinitionInLinkageUnit = !Sym.isUndefined();
+
+      // We do not support linker redefined symbols (e.g. --wrap) for device
+      // image linking, so the symbols will not be changed after LTO.
+      Res.LinkerRedefined = false;
+
+      // Mark this symbol as the prevailing one.
+      PrevailingSymbols[Sym.getName()] |= Res.Prevailing;
+    }
+
+    // Add the bitcode file with its resolved symbols to the LTO job.
+    if (Error Err = LTOBackend->add(std::move(BitcodeFile), Resolutions))
+      return Err;
+  }
+
+  // Run the LTO job to compile the bitcode.
+  size_t MaxTasks = LTOBackend->getMaxTasks();
+  std::vector<SmallString<128>> Files(MaxTasks);
+  auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
+    int FD = -1;
+    auto &TempFile = Files[Task];
+    StringRef Extension = (TheTriple.isNVPTX()) ? "s" : "o";
+    if (std::error_code EC = sys::fs::createTemporaryFile(
+            "lto-" + TheTriple.getTriple(), Extension, FD, TempFile))
+      return nullptr;
+    TempFiles.push_back(static_cast<std::string>(TempFile));
+    return std::make_unique<CachedFileStream>(
+        std::make_unique<llvm::raw_fd_ostream>(FD, true));
+  };
+  if (Error Err = LTOBackend->run(AddStream))
+    return Err;
+
+  for (auto &File : Files) {
+    if (!TheTriple.isNVPTX())
+      continue;
+
+    auto FileOrErr = nvptx::assemble(File, TheTriple, Arch);
+    if (!FileOrErr)
+      return FileOrErr.takeError();
+    File = *FileOrErr;
+  }
+
+  return static_cast<std::string>(Files.front());
+}
+
 /// Runs the appropriate linking action on all the device files specified in \p
 /// DeviceFiles. The linked device images are returned in \p LinkedImages.
 Error linkDeviceFiles(ArrayRef<DeviceFile> DeviceFiles,
@@ -499,6 +790,12 @@
     StringRef Arch(TargetFeatures.second);
 
     // TODO: Run LTO or bitcode linking before the final link job.
+    auto ObjectOrErr =
+        linkBitcodeFiles(LinkerInput.getValue(), TheTriple, Arch);
+    if (!ObjectOrErr)
+      return ObjectOrErr.takeError();
+    if ((*ObjectOrErr).hasValue())
+      LinkerInput.getValue() = {**ObjectOrErr};
 
     auto ImageOrErr =
         linkDevice(LinkerInput.getValue(), LinkerArgs, TheTriple, Arch);
@@ -525,7 +822,7 @@
   // Create a new file to write the wrapped bitcode file to.
   SmallString<128> BitcodeFile;
   if (std::error_code EC =
-          sys::fs::createTemporaryFile("offload", "bc", BitcodeFile))
+          sys::fs::createTemporaryFile("wrapper", "bc", BitcodeFile))
     return createFileError(BitcodeFile, EC);
   TempFiles.push_back(static_cast<std::string>(BitcodeFile));
 
@@ -554,7 +851,7 @@
   // Create a new file to write the wrapped bitcode file to.
   SmallString<128> ObjectFile;
   if (std::error_code EC =
-          sys::fs::createTemporaryFile("offload", "o", ObjectFile))
+          sys::fs::createTemporaryFile("image", "o", ObjectFile))
     return createFileError(BitcodeFile, EC);
   TempFiles.push_back(static_cast<std::string>(ObjectFile));
 
@@ -592,6 +889,8 @@
 Optional<std::string> searchLibraryBaseName(StringRef Name,
                                             ArrayRef<StringRef> SearchPaths) {
   for (StringRef Dir : SearchPaths) {
+    if (Optional<std::string> File = findFile(Dir, "lib" + Name + ".so"))
+      return None;
     if (Optional<std::string> File = findFile(Dir, "lib" + Name + ".a"))
       return File;
   }
@@ -614,6 +913,11 @@
 
 int main(int argc, const char **argv) {
   InitLLVM X(argc, argv);
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmParsers();
+  InitializeAllAsmPrinters();
 
   LinkerExecutable = argv[0];
   sys::PrintStackTraceOnErrorSignal(argv[0]);
Index: clang/tools/clang-linker-wrapper/CMakeLists.txt
===================================================================
--- clang/tools/clang-linker-wrapper/CMakeLists.txt
+++ clang/tools/clang-linker-wrapper/CMakeLists.txt
@@ -1,4 +1,15 @@
-set(LLVM_LINK_COMPONENTS BitWriter Core BinaryFormat IRReader Object Support)
+set(LLVM_LINK_COMPONENTS 
+  ${LLVM_TARGETS_TO_BUILD}
+  BitWriter
+  Core
+  BinaryFormat
+  MC
+  Passes
+  IRReader
+  Object
+  Support
+  CodeGen
+  LTO)
 
 if(NOT CLANG_BUILT_STANDALONE)
   set(tablegen_deps intrinsics_gen)
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4607,7 +4607,7 @@
     if (JA.getType() == types::TY_LLVM_BC)
       CmdArgs.push_back("-emit-llvm-uselists");
 
-    if (IsUsingLTO) {
+    if (IsUsingLTO && !Args.hasArg(options::OPT_fopenmp_new_driver)) {
       // Only AMDGPU supports device-side LTO.
       if (IsDeviceOffloadAction && !Triple.isAMDGPU()) {
         D.Diag(diag::err_drv_unsupported_opt_for_target)
@@ -8133,6 +8133,39 @@
                                  const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
+  if (getToolChain().getDriver().isUsingLTO(/* IsOffload */ true)) {
+    // Pass in target features for each toolchain.
+    auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
+    for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
+         ++TI) {
+      const ToolChain *TC = TI->second;
+      const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
+      ArgStringList FeatureArgs;
+      TC->addClangTargetOptions(TCArgs, FeatureArgs, Action::OFK_OpenMP);
+      auto FeatureIt = llvm::find(FeatureArgs, "-target-feature");
+      CmdArgs.push_back(Args.MakeArgString(
+          "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1)));
+    }
+
+    // Pass in the optimization level to use for LTO.
+    if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+      StringRef OOpt;
+      if (A->getOption().matches(options::OPT_O4) ||
+          A->getOption().matches(options::OPT_Ofast))
+        OOpt = "3";
+      else if (A->getOption().matches(options::OPT_O)) {
+        OOpt = A->getValue();
+        if (OOpt == "g")
+          OOpt = "1";
+        else if (OOpt == "s" || OOpt == "z")
+          OOpt = "2";
+      } else if (A->getOption().matches(options::OPT_O0))
+        OOpt = "0";
+      if (!OOpt.empty())
+        CmdArgs.push_back(Args.MakeArgString(Twine("-opt-level=O") + OOpt));
+    }
+  }
+
   // Construct the link job so we can wrap around it.
   Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput);
   const auto &LinkCommand = C.getJobs().getJobs().back();
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -217,8 +217,7 @@
 }
 
 InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,
-                                     bool IsClCompatMode,
-                                     bool &ContainsError) {
+                                     bool IsClCompatMode, bool &ContainsError) {
   llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
   ContainsError = false;
 
@@ -252,9 +251,9 @@
       unsigned DiagID;
       auto ArgString = A->getAsString(Args);
       std::string Nearest;
-      if (getOpts().findNearest(
-            ArgString, Nearest, IncludedFlagsBitmask,
-            ExcludedFlagsBitmask | options::Unsupported) > 1) {
+      if (getOpts().findNearest(ArgString, Nearest, IncludedFlagsBitmask,
+                                ExcludedFlagsBitmask | options::Unsupported) >
+          1) {
         DiagID = diag::err_drv_unsupported_opt;
         Diag(DiagID) << ArgString;
       } else {
@@ -279,8 +278,8 @@
     unsigned DiagID;
     auto ArgString = A->getAsString(Args);
     std::string Nearest;
-    if (getOpts().findNearest(
-          ArgString, Nearest, IncludedFlagsBitmask, ExcludedFlagsBitmask) > 1) {
+    if (getOpts().findNearest(ArgString, Nearest, IncludedFlagsBitmask,
+                              ExcludedFlagsBitmask) > 1) {
       DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
                           : diag::err_drv_unknown_argument;
       Diags.Report(DiagID) << ArgString;
@@ -309,15 +308,14 @@
   if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
       (PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
       (PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
-      (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) ||
-      CCGenDiagnostics) {
+      (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || CCGenDiagnostics) {
     FinalPhase = phases::Preprocess;
 
-  // --precompile only runs up to precompilation.
+    // --precompile only runs up to precompilation.
   } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) {
     FinalPhase = phases::Precompile;
 
-  // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
+    // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
              (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) ||
              (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
@@ -329,18 +327,18 @@
              (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
     FinalPhase = phases::Compile;
 
-  // -S only runs up to the backend.
+    // -S only runs up to the backend.
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
     FinalPhase = phases::Backend;
 
-  // -c compilation only runs up to the assembler.
+    // -c compilation only runs up to the assembler.
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
     FinalPhase = phases::Assemble;
 
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) {
     FinalPhase = phases::IfsMerge;
 
-  // Otherwise do everything.
+    // Otherwise do everything.
   } else
     FinalPhase = phases::Link;
 
@@ -455,8 +453,7 @@
 ///
 /// This routine provides the logic to compute a target triple from various
 /// args passed to the driver and the default triple string.
-static llvm::Triple computeTargetTriple(const Driver &D,
-                                        StringRef TargetTriple,
+static llvm::Triple computeTargetTriple(const Driver &D, StringRef TargetTriple,
                                         const ArgList &Args,
                                         StringRef DarwinArchName = "") {
   // FIXME: Already done in Compilation *Driver::BuildCompilation
@@ -567,8 +564,8 @@
   // Handle -miamcu flag.
   if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
     if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
-      D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
-                                                       << Target.str();
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << "-miamcu" << Target.str();
 
     if (A && !A->getOption().matches(options::OPT_m32))
       D.Diag(diag::err_drv_argument_not_allowed_with)
@@ -973,8 +970,8 @@
     // Get architecture name from config file name like 'i386.cfg' or
     // 'armv7l-clang.cfg'.
     // Check if command line options changes effective triple.
-    llvm::Triple EffectiveTriple = computeTargetTriple(*this,
-                                             CfgTriple.getTriple(), *CLOptions);
+    llvm::Triple EffectiveTriple =
+        computeTargetTriple(*this, CfgTriple.getTriple(), *CLOptions);
     if (CfgTriple.getArch() != EffectiveTriple.getArch()) {
       FixedConfigFile = EffectiveTriple.getArchName();
       FixedArchPrefixLen = FixedConfigFile.size();
@@ -1052,8 +1049,8 @@
   bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr);
 
   // All arguments, from both config file and command line.
-  InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions)
-                                              : std::move(*CLOptions));
+  InputArgList Args =
+      std::move(HasConfigFile ? std::move(*CfgOptions) : std::move(*CLOptions));
 
   // The args for config files or /clang: flags belong to different InputArgList
   // objects than Args. This copies an Arg from one of those other InputArgLists
@@ -1195,14 +1192,13 @@
   if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
     StringRef Name = A->getValue();
     unsigned Model = llvm::StringSwitch<unsigned>(Name)
-        .Case("off", EmbedNone)
-        .Case("all", EmbedBitcode)
-        .Case("bitcode", EmbedBitcode)
-        .Case("marker", EmbedMarker)
-        .Default(~0U);
+                         .Case("off", EmbedNone)
+                         .Case("all", EmbedBitcode)
+                         .Case("bitcode", EmbedBitcode)
+                         .Case("marker", EmbedMarker)
+                         .Default(~0U);
     if (Model == ~0U) {
-      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
-                                                << Name;
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
     } else
       BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
   }
@@ -1214,8 +1210,8 @@
   DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
 
   // Owned by the host.
-  const ToolChain &TC = getToolChain(
-      *UArgs, computeTargetTriple(*this, TargetTriple, *UArgs));
+  const ToolChain &TC =
+      getToolChain(*UArgs, computeTargetTriple(*this, TargetTriple, *UArgs));
 
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs,
@@ -1315,7 +1311,7 @@
     size_t LineEnd = Data.find_first_of("\n", ParentProcPos);
     if (LineEnd == StringRef::npos)
       continue;
-    StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim();
+    StringRef ParentProcess = Data.slice(ParentProcPos + 15, LineEnd).trim();
     int OpenBracket = -1, CloseBracket = -1;
     for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) {
       if (ParentProcess[i] == '[')
@@ -1328,7 +1324,8 @@
     int CrashPID;
     if (OpenBracket < 0 || CloseBracket < 0 ||
         ParentProcess.slice(OpenBracket + 1, CloseBracket)
-            .getAsInteger(10, CrashPID) || CrashPID != PID) {
+            .getAsInteger(10, CrashPID) ||
+        CrashPID != PID) {
       continue;
     }
 
@@ -1531,8 +1528,7 @@
       CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash";
       Diag(clang::diag::note_drv_command_failed_diag_msg)
           << "Crash backtrace is located in";
-      Diag(clang::diag::note_drv_command_failed_diag_msg)
-          << CrashDiagDir.str();
+      Diag(clang::diag::note_drv_command_failed_diag_msg) << CrashDiagDir.str();
       Diag(clang::diag::note_drv_command_failed_diag_msg)
           << "(choose the .crash file that corresponds to your crash)";
     }
@@ -1817,11 +1813,11 @@
 
   if (C.getArgs().hasArg(options::OPT_v)) {
     if (!SystemConfigDir.empty())
-      llvm::errs() << "System configuration file directory: "
-                   << SystemConfigDir << "\n";
+      llvm::errs() << "System configuration file directory: " << SystemConfigDir
+                   << "\n";
     if (!UserConfigDir.empty())
-      llvm::errs() << "User configuration file directory: "
-                   << UserConfigDir << "\n";
+      llvm::errs() << "User configuration file directory: " << UserConfigDir
+                   << "\n";
   }
 
   const ToolChain &TC = C.getDefaultToolChain();
@@ -1895,7 +1891,7 @@
     StringRef ProgName = A->getValue();
 
     // Null program name cannot have a path.
-    if (! ProgName.empty())
+    if (!ProgName.empty())
       llvm::outs() << GetProgramPath(ProgName, TC);
 
     llvm::outs() << "\n";
@@ -2114,7 +2110,7 @@
 
   // Add in arch bindings for every top level action, as well as lipo and
   // dsymutil steps if needed.
-  for (Action* Act : SingleActions) {
+  for (Action *Act : SingleActions) {
     // Make sure we can lipo this kind of output. If not (and it is an actual
     // output) then we disallow, since we can't create an output file with the
     // right name without overwriting it. We could remove this oddity by just
@@ -2157,7 +2153,7 @@
 
       // Verify the debug info output.
       if (Args.hasArg(options::OPT_verify_debug_info)) {
-        Action* LastAction = Actions.back();
+        Action *LastAction = Actions.back();
         Actions.pop_back();
         Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
             LastAction, types::TY_Nothing));
@@ -2262,7 +2258,7 @@
          Args.filtered(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) {
       if (Previous) {
         Diag(clang::diag::warn_drv_overriding_flag_option)
-          << Previous->getSpelling() << A->getSpelling();
+            << Previous->getSpelling() << A->getSpelling();
         ShowNote = true;
       }
       Previous = A;
@@ -2312,7 +2308,8 @@
             Ty = TC.LookupTypeForExtension(Ext + 1);
 
           if (Ty == types::TY_INVALID) {
-            if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
+            if (IsCLMode() &&
+                (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
               Ty = types::TY_CXX;
             else if (CCCIsCPP() || CCGenDiagnostics)
               Ty = types::TY_C;
@@ -2494,7 +2491,7 @@
     virtual void appendLinkDeviceActions(ActionList &AL) {}
 
     /// Append linker host action generated by the builder.
-    virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }
+    virtual Action *appendLinkHostActions(ActionList &AL) { return nullptr; }
 
     /// Append linker actions generated by the builder.
     virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
@@ -2718,7 +2715,7 @@
         return false;
 
       Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
-          options::OPT_fno_gpu_rdc, /*Default=*/false);
+                                 options::OPT_fno_gpu_rdc, /*Default=*/false);
 
       const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
       assert(HostTC && "No toolchain for host compilation.");
@@ -2740,12 +2737,12 @@
       Arg *PartialCompilationArg = Args.getLastArg(
           options::OPT_cuda_host_only, options::OPT_cuda_device_only,
           options::OPT_cuda_compile_host_device);
-      CompileHostOnly = PartialCompilationArg &&
-                        PartialCompilationArg->getOption().matches(
-                            options::OPT_cuda_host_only);
-      CompileDeviceOnly = PartialCompilationArg &&
-                          PartialCompilationArg->getOption().matches(
-                              options::OPT_cuda_device_only);
+      CompileHostOnly =
+          PartialCompilationArg && PartialCompilationArg->getOption().matches(
+                                       options::OPT_cuda_host_only);
+      CompileDeviceOnly =
+          PartialCompilationArg && PartialCompilationArg->getOption().matches(
+                                       options::OPT_cuda_device_only);
       EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
       EmitAsm = Args.getLastArg(options::OPT_S);
       FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
@@ -3152,10 +3149,10 @@
         // Linking all inputs for the current GPU arch.
         // LI contains all the inputs for the linker.
         OffloadAction::DeviceDependences DeviceLinkDeps;
-        DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
-            GpuArchList[I], AssociatedOffloadKind);
+        DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I],
+                           AssociatedOffloadKind);
         AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
-            DeviceLinkAction->getType()));
+                                                 DeviceLinkAction->getType()));
         ++I;
       }
       DeviceLinkerInputs.clear();
@@ -3165,14 +3162,15 @@
       OffloadAction::DeviceDependences DDeps;
       auto *TopDeviceLinkAction =
           C.MakeAction<LinkJobAction>(AL, types::TY_Object);
-      DDeps.add(*TopDeviceLinkAction, *ToolChains[0],
-          nullptr, AssociatedOffloadKind);
+      DDeps.add(*TopDeviceLinkAction, *ToolChains[0], nullptr,
+                AssociatedOffloadKind);
 
       // Offload the host object to the host linker.
-      AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
+      AL.push_back(
+          C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
     }
 
-    Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
+    Action *appendLinkHostActions(ActionList &AL) override { return AL.back(); }
 
     void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
   };
@@ -3313,17 +3311,17 @@
             C.MakeAction<LinkJobAction>(LI, types::TY_Image);
         OffloadAction::DeviceDependences DeviceLinkDeps;
         DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
-		        Action::OFK_OpenMP);
+                           Action::OFK_OpenMP);
         AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
-            DeviceLinkAction->getType()));
+                                                 DeviceLinkAction->getType()));
         ++TC;
       }
       DeviceLinkerInputs.clear();
     }
 
-    Action* appendLinkHostActions(ActionList &AL) override {
-      // Create wrapper bitcode from the result of device link actions and compile
-      // it to an object which will be added to the host link command.
+    Action *appendLinkHostActions(ActionList &AL) override {
+      // Create wrapper bitcode from the result of device link actions and
+      // compile it to an object which will be added to the host link command.
       auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
       auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
       return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
@@ -3556,7 +3554,7 @@
     return false;
   }
 
-  Action* makeHostLinkAction() {
+  Action *makeHostLinkAction() {
     // Build a list of device linking actions.
     ActionList DeviceAL;
     for (DeviceActionBuilder *SB : SpecializedBuilders) {
@@ -3569,7 +3567,7 @@
       return nullptr;
 
     // Let builders add host linking actions.
-    Action* HA = nullptr;
+    Action *HA = nullptr;
     for (DeviceActionBuilder *SB : SpecializedBuilders) {
       if (!SB->isValid())
         continue;
@@ -4046,7 +4044,7 @@
 
     auto TC = ToolChains.begin();
     for (Action *&A : DeviceActions) {
-      A = ConstructPhaseAction(C, Args, Phase, A);
+      A = ConstructPhaseAction(C, Args, Phase, A, Action::OFK_OpenMP);
 
       if (isa<CompileJobAction>(A)) {
         HostAction->setCannotBeCollapsedWithNextDependentAction();
@@ -4163,6 +4161,12 @@
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
     }
+    if (isUsingLTO(/* IsOffload */ true) &&
+        TargetDeviceOffloadKind == Action::OFK_OpenMP) {
+      types::ID Output =
+          Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
+      return C.MakeAction<BackendJobAction>(Input, Output);
+    }
     if (Args.hasArg(options::OPT_emit_llvm) ||
         (TargetDeviceOffloadKind == Action::OFK_HIP &&
          Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
@@ -4603,8 +4607,8 @@
         continue;
       }
 
-      // This is legal to combine. Append any offload action we found and add the
-      // current input to preprocessor inputs.
+      // This is legal to combine. Append any offload action we found and add
+      // the current input to preprocessor inputs.
       CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
                                     PreprocessJobOffloadActions.end());
       NewInputs.append(PJ->input_begin(), PJ->input_end());
@@ -4627,8 +4631,7 @@
   /// connected to collapsed actions are updated accordingly. The latter enables
   /// the caller of the selector to process them afterwards instead of just
   /// dropping them. If no suitable tool is found, null will be returned.
-  const Tool *getTool(ActionList &Inputs,
-                      ActionList &CollapsedOffloadAction) {
+  const Tool *getTool(ActionList &Inputs, ActionList &CollapsedOffloadAction) {
     //
     // Get the largest chain of actions that we could combine.
     //
@@ -4671,7 +4674,7 @@
     return T;
   }
 };
-}
+} // namespace
 
 /// Return a string that uniquely identifies the result of a job. The bound arch
 /// is not necessarily represented in the toolchain's triple -- for example,
@@ -4809,9 +4812,9 @@
     StringRef ArchName = BAA->getArchName();
 
     if (!ArchName.empty())
-      TC = &getToolChain(C.getArgs(),
-                         computeTargetTriple(*this, TargetTriple,
-                                             C.getArgs(), ArchName));
+      TC = &getToolChain(
+          C.getArgs(),
+          computeTargetTriple(*this, TargetTriple, C.getArgs(), ArchName));
     else
       TC = &C.getDefaultToolChain();
 
@@ -4820,7 +4823,6 @@
                               TargetDeviceOffloadKind);
   }
 
-
   ActionList Inputs = A->getInputs();
 
   const JobAction *JA = cast<JobAction>(A);
@@ -4984,10 +4986,11 @@
       BaseInput =
           C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
     }
-    Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
-                                             AtTopLevel, MultipleArchs,
-                                             OffloadingPrefix),
-                       BaseInput);
+    Result =
+        InputInfo(A,
+                  GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel,
+                                     MultipleArchs, OffloadingPrefix),
+                  BaseInput);
   }
 
   if (CCCPrintBindings && !CCGenDiagnostics) {
@@ -5497,15 +5500,15 @@
     case llvm::Triple::Linux:
     case llvm::Triple::ELFIAMCU:
       if (Target.getArch() == llvm::Triple::hexagon)
-        TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
-                                                             Args);
+        TC =
+            std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args);
       else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
                !Target.hasEnvironment())
         TC = std::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
-                                                              Args);
+                                                             Args);
       else if (Target.isPPC())
         TC = std::make_unique<toolchains::PPCLinuxToolChain>(*this, Target,
-                                                              Args);
+                                                             Args);
       else if (Target.getArch() == llvm::Triple::ve)
         TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);
 
@@ -5543,7 +5546,7 @@
         break;
       case llvm::Triple::Itanium:
         TC = std::make_unique<toolchains::CrossWindowsToolChain>(*this, Target,
-                                                                  Args);
+                                                                 Args);
         break;
       case llvm::Triple::MSVC:
       case llvm::Triple::UnknownEnvironment:
@@ -5552,8 +5555,7 @@
           TC = std::make_unique<toolchains::CrossWindowsToolChain>(
               *this, Target, Args);
         else
-          TC =
-              std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
+          TC = std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
         break;
       }
       break;
@@ -5580,8 +5582,8 @@
         TC = std::make_unique<toolchains::TCELEToolChain>(*this, Target, Args);
         break;
       case llvm::Triple::hexagon:
-        TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
-                                                             Args);
+        TC =
+            std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args);
         break;
       case llvm::Triple::lanai:
         TC = std::make_unique<toolchains::LanaiToolChain>(*this, Target, Args);
@@ -5597,8 +5599,7 @@
         TC = std::make_unique<toolchains::AVRToolChain>(*this, Target, Args);
         break;
       case llvm::Triple::msp430:
-        TC =
-            std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
+        TC = std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
         break;
       case llvm::Triple::riscv32:
       case llvm::Triple::riscv64:
@@ -5618,7 +5619,7 @@
       default:
         if (Target.getVendor() == llvm::Triple::Myriad)
           TC = std::make_unique<toolchains::MyriadToolChain>(*this, Target,
-                                                              Args);
+                                                             Args);
         else if (toolchains::BareMetal::handlesTarget(Target))
           TC = std::make_unique<toolchains::BareMetal>(*this, Target, Args);
         else if (Target.isOSBinFormatELF())
@@ -5687,12 +5688,12 @@
 
 bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const {
   // Say "no" if there is not exactly one input of a type flang understands.
-  if (JA.size() != 1 ||
-      !types::isFortran((*JA.input_begin())->getType()))
+  if (JA.size() != 1 || !types::isFortran((*JA.input_begin())->getType()))
     return false;
 
   // And say "no" if this is not a kind of action flang understands.
-  if (!isa<PreprocessJobAction>(JA) && !isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
+  if (!isa<PreprocessJobAction>(JA) && !isa<CompileJobAction>(JA) &&
+      !isa<BackendJobAction>(JA))
     return false;
 
   return true;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to