sfantao updated this revision to Diff 36816.
sfantao added a comment.

Make the offloading ELF sections consistent with what is in 
http://reviews.llvm.org/D12614.

Fix bug in AtTopLevel flag, so that the bundling job is considered always top 
level job.

Fix several typos.


http://reviews.llvm.org/D9888

Files:
  include/clang/Basic/DiagnosticDriverKinds.td
  include/clang/Driver/Action.h
  include/clang/Driver/CC1Options.td
  include/clang/Driver/Driver.h
  include/clang/Driver/Options.td
  include/clang/Driver/ToolChain.h
  include/clang/Driver/Types.h
  lib/Driver/Action.cpp
  lib/Driver/Compilation.cpp
  lib/Driver/Driver.cpp
  lib/Driver/ToolChain.cpp
  lib/Driver/ToolChains.cpp
  lib/Driver/ToolChains.h
  lib/Driver/Tools.cpp
  lib/Driver/Tools.h
  lib/Driver/Types.cpp
  test/OpenMP/target_driver.c
  tools/CMakeLists.txt
  tools/Makefile
  tools/clang-offload-bundler/CMakeLists.txt
  tools/clang-offload-bundler/ClangOffloadBundler.cpp
  tools/clang-offload-bundler/Makefile

Index: tools/clang-offload-bundler/Makefile
===================================================================
--- /dev/null
+++ tools/clang-offload-bundler/Makefile
@@ -0,0 +1,21 @@
+##===- clang-offload-bundler/Makefile ----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL := ../..
+
+TOOLNAME = clang-offload-bundler
+
+# No plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(CLANG_LEVEL)/../../Makefile.config
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) support option
+USEDLIBS = clangBasic.a
+
+include $(CLANG_LEVEL)/Makefile
Index: tools/clang-offload-bundler/ClangOffloadBundler.cpp
===================================================================
--- /dev/null
+++ tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -0,0 +1,548 @@
+//===-- clang-offload-bundler/ClangOffloadBundler.cpp - Clang format tool -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a clang-offload-bundler that bundles different
+/// files that relate with the same source code but different targets into a
+/// single one. Also the implements the opposite functionality, i.e. unbundle
+/// files previous created by this tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/Version.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Signals.h"
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category, everything else (except for -version
+// and -help) will be hidden.
+static cl::OptionCategory
+    ClangOffloadBundlerCategory("clang-offload-bundler options");
+
+static cl::list<std::string>
+    InputFileNames("inputs", cl::CommaSeparated, cl::OneOrMore,
+                   cl::desc("[<input file>,...]"),
+                   cl::cat(ClangOffloadBundlerCategory));
+static cl::list<std::string>
+    OutputFileNames("outputs", cl::CommaSeparated, cl::OneOrMore,
+                    cl::desc("[<output file>,...]"),
+                    cl::cat(ClangOffloadBundlerCategory));
+static cl::list<std::string> TargetNames("targets", cl::CommaSeparated,
+                                         cl::OneOrMore,
+                                         cl::desc("[<target triple>,...]"),
+                                         cl::cat(ClangOffloadBundlerCategory));
+static cl::opt<std::string>
+    FilesType("type", cl::Required,
+              cl::desc("Type of the files to be bundled/unbundled.\n"
+                       "Current supported types are:\n"
+                       "  i   - cpp-output\n"
+                       "  ii  - c++-cpp-output\n"
+                       "  ll  - llvm\n"
+                       "  bc  - llvm-bc\n"
+                       "  s   - assembler\n"
+                       "  o   - object\n"
+                       "  gch - precompiled-header"),
+              cl::cat(ClangOffloadBundlerCategory));
+static cl::opt<bool>
+    Unbundle("unbundle",
+             cl::desc("Unbundle bundled file into several output files.\n"),
+             cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+
+/// \brief Magic string that marks the existence of offloading data.
+#define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
+
+/// \brief Generic file handler interface.
+class FileHandler {
+protected:
+  /// \brief Update the file handler with information from the header of the
+  /// bundled file
+  virtual void ReadHeader(MemoryBuffer &Input) = 0;
+  /// \brief Read the marker of the next bundled to be read in the file. The
+  /// triple of the target associated with that bundled is returned. An empty
+  /// string is returned if there are no more bundles to be read.
+  virtual StringRef ReadBundleStart(MemoryBuffer &Input) = 0;
+  /// \brief Read the marker that closes the current bundle.
+  virtual void ReadBundleEnd(MemoryBuffer &Input) = 0;
+  /// \brief Read the current bundle and write the result into the stream \a OS.
+  virtual void ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
+
+  /// \brief Write the header of the bundled file to \a OS based on the
+  /// information gathered from \a Inputs.
+  virtual void WriteHeader(raw_fd_ostream &OS,
+                           ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
+  /// \brief Write the marker that initiates a bundle for the triple \a
+  /// TargetTriple to \a OS.
+  virtual void WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
+  /// \brief Write the marker that closes a bundle for the triple \a
+  /// TargetTriple to \a OS.
+  virtual void WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
+  /// \brief Write the bundle from \a Input into \a OS.
+  virtual void WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
+
+public:
+  FileHandler() {}
+  virtual ~FileHandler() {}
+
+  /// \brief Bundle the files. Return true if an error was found.
+  bool Bundle() {
+    std::error_code EC;
+
+    // Create output file.
+    raw_fd_ostream OutputFile(OutputFileNames.front(), EC, sys::fs::F_None);
+
+    if (EC) {
+      llvm::errs() << "error: Can't open file " << OutputFileNames.front()
+                   << ".\n";
+      return true;
+    }
+
+    // Open input files.
+    std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers(
+        InputFileNames.size());
+
+    unsigned Idx = 0;
+    for (auto I : InputFileNames) {
+      ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+          MemoryBuffer::getFileOrSTDIN(I);
+      if (std::error_code EC = CodeOrErr.getError()) {
+        llvm::errs() << EC.message() << "\n";
+        return true;
+      }
+      InputBuffers[Idx++] = std::move(CodeOrErr.get());
+    }
+
+    // Write header.
+    WriteHeader(OutputFile, InputBuffers);
+
+    // Write all bundles along with the start/end markers.
+    auto Input = InputBuffers.begin();
+    for (auto Triple = TargetNames.begin(); Triple < TargetNames.end();
+         ++Triple, ++Input) {
+      WriteBundleStart(OutputFile, *Triple);
+      WriteBundle(OutputFile, *Input->get());
+      WriteBundleEnd(OutputFile, *Triple);
+    }
+    return false;
+  }
+
+  // Unbundle the files. Return true if an error was found.
+  bool Unbundle() {
+    // Open Input file.
+    ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+        MemoryBuffer::getFileOrSTDIN(InputFileNames.front());
+    if (std::error_code EC = CodeOrErr.getError()) {
+      llvm::errs() << EC.message() << "\n";
+      return true;
+    }
+
+    // Read the header of the bundled file.
+    MemoryBuffer &Input = *CodeOrErr.get();
+    ReadHeader(Input);
+
+    // Create a work list that consist of the map triple/output file.
+    StringMap<StringRef> Worklist;
+    auto Output = OutputFileNames.begin();
+    for (auto Triple = TargetNames.begin(); Triple < TargetNames.end();
+         ++Triple, ++Output)
+      Worklist[*Triple] = *Output;
+
+    // Read all the bundles that are in the work list, and return an error is a
+    // given bundle wasn't found.
+    while (!Worklist.empty()) {
+      StringRef CurTriple = ReadBundleStart(Input);
+
+      if (CurTriple.empty()) {
+        llvm::errs()
+            << "error: Unable to find bundles for all requested targets.\n";
+        return true;
+      }
+
+      auto Output = Worklist.find(CurTriple);
+      // The file may have more bundles for other targets.
+      if (Output == Worklist.end()) {
+        continue;
+      }
+
+      // Check if the output file can be opened and copy the bundle to it.
+      std::error_code EC;
+      raw_fd_ostream OutputFile(Output->second, EC, sys::fs::F_None);
+      if (EC) {
+        llvm::errs() << "error: Can't open file " << Output->second << ".\n";
+        return true;
+      }
+      ReadBundle(OutputFile, Input);
+      ReadBundleEnd(Input);
+      Worklist.remove(&*Output);
+    }
+
+    return false;
+  }
+};
+
+// Handler for binary files. The bundled file will have the following format
+// (all integers are stored in little-endian format):
+//
+// "OFFLOAD_BUNDLER_MAGIC_STR" (ASCII encoding of the string)
+//
+// NumberOfOffloadBundles (8-byte integer)
+//
+// OffsetOfBundle1 (8-byte integer)
+// SizeOfBundle1 (8-byte integer)
+// NumberOfBytesInTripleOfBundle1 (8-byte integer)
+// TripleOfBundle1 (byte length defined before)
+//
+// ...
+//
+// OffsetOfBundleN (8-byte integer)
+// SizeOfBundleN (8-byte integer)
+// NumberOfBytesInTripleOfBundleN (8-byte integer)
+// TripleOfBundleN (byte length defined before)
+//
+// Bundle1
+// ...
+// BundleN
+
+/// \brief Read 8-byte integers to/from a buffer in little-endian format.
+static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
+  uint64_t Res = 0;
+  const char *Data = Buffer.data();
+
+  for (unsigned i = 0; i < 8; ++i) {
+    Res <<= 8;
+    uint64_t Char = (uint64_t)Data[pos + 7 - i];
+    Res |= 0xffu & Char;
+  }
+  return Res;
+}
+
+/// \brief Write and write 8-byte integers to/from a buffer in little-endian
+/// format.
+static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
+
+  for (unsigned i = 0; i < 8; ++i) {
+    char Char = (char)(Val & 0xffu);
+    OS.write(&Char, 1);
+    Val >>= 8;
+  }
+}
+
+class BinaryFileHandler : public FileHandler {
+  /// \brief Information about the bundles extracted from the header.
+  struct BundleInfo {
+    /// \brief Size of the bundle.
+    uint64_t Size;
+    /// \brief Offset at which the bundle starts in the bundled file.
+    uint64_t Offset;
+    BundleInfo() : Size(0), Offset(0) {}
+    BundleInfo(uint64_t Size, uint64_t Offset) : Size(Size), Offset(Offset) {}
+  };
+  /// Map between a triple and the corresponding bundle information.
+  StringMap<BundleInfo> BundlesInfo;
+
+  /// Number of triples read so far.
+  size_t ReadTriples;
+
+protected:
+  void ReadHeader(MemoryBuffer &Input) {
+    StringRef FC = Input.getBuffer();
+
+    // Check if buffer is smaller than magic string.
+    size_t ReadChars = sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
+    if (ReadChars > FC.size())
+      return;
+
+    // Check if no magic was found.
+    StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
+    if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
+      return;
+
+    // Read number of bundles.
+    if (ReadChars + 8 > FC.size())
+      return;
+
+    uint64_t NumberOfBundles = Read8byteIntegerFromBuffer(FC, ReadChars);
+    ReadChars += 8;
+
+    // Read bundle offsets, sizes and triples.
+    for (uint64_t i = 0; i < NumberOfBundles; ++i) {
+
+      // Read offset.
+      if (ReadChars + 8 > FC.size())
+        return;
+
+      uint64_t Offset = Read8byteIntegerFromBuffer(FC, ReadChars);
+      ReadChars += 8;
+
+      // Read size.
+      if (ReadChars + 8 > FC.size())
+        return;
+
+      uint64_t Size = Read8byteIntegerFromBuffer(FC, ReadChars);
+      ReadChars += 8;
+
+      // Read triple size.
+      if (ReadChars + 8 > FC.size())
+        return;
+
+      uint64_t TripleSize = Read8byteIntegerFromBuffer(FC, ReadChars);
+      ReadChars += 8;
+
+      // Read triple.
+      if (ReadChars + TripleSize > FC.size())
+        return;
+
+      StringRef Triple(&FC.data()[ReadChars], TripleSize);
+      ReadChars += TripleSize;
+
+      // Check if the offset and size make sense.
+      if (!Size || !Offset || Offset + Size > FC.size())
+        return;
+
+      assert(BundlesInfo.find(Triple) == BundlesInfo.end() &&
+             "Triple is duplicated??");
+      BundlesInfo[Triple] = BundleInfo(Size, Offset);
+    }
+  }
+  StringRef ReadBundleStart(MemoryBuffer &Input) {
+    StringRef CurTriple = TargetNames[ReadTriples];
+    return CurTriple;
+  }
+  void ReadBundleEnd(MemoryBuffer &Input) {
+    ++ReadTriples;
+    return;
+  }
+  void ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) {
+    StringRef FC = Input.getBuffer();
+    StringRef CurTriple = TargetNames[ReadTriples];
+
+    auto BI = BundlesInfo.lookup(CurTriple);
+    assert(BI.Size && "No bundle info found!");
+
+    OS.write(&FC.data()[BI.Offset], BI.Size);
+  }
+
+  void WriteHeader(raw_fd_ostream &OS,
+                   ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) {
+    // Compute size of the header.
+    uint64_t HeaderSize = 0;
+
+    HeaderSize += sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
+    HeaderSize += 8; // Number of Bundles
+
+    for (auto &T : TargetNames) {
+      HeaderSize += 3 * 8; // Bundle offset, Size of bundle and size of triple.
+      HeaderSize += T.size(); // The triple.
+    }
+
+    // Write to the buffer the header.
+    OS << OFFLOAD_BUNDLER_MAGIC_STR;
+
+    Write8byteIntegerToBuffer(OS, TargetNames.size());
+
+    unsigned Idx = 0;
+    for (auto &T : TargetNames) {
+      MemoryBuffer &MB = *Inputs[Idx++].get();
+      // Bundle offset.
+      Write8byteIntegerToBuffer(OS, HeaderSize);
+      // Size of the bundle (adds to the next bundle's offset)
+      Write8byteIntegerToBuffer(OS, MB.getBufferSize());
+      HeaderSize += MB.getBufferSize();
+      // Size of the triple
+      Write8byteIntegerToBuffer(OS, T.size());
+      // Triple
+      OS << T;
+    }
+  }
+  void WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) { return; }
+  void WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) { return; }
+  void WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) {
+    OS.write(Input.getBufferStart(), Input.getBufferSize());
+    return;
+  }
+
+public:
+  BinaryFileHandler() : FileHandler(), ReadTriples(0) {}
+  ~BinaryFileHandler() {}
+};
+
+// Handler for text files. The bundled file will have the following format.
+//
+// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
+// Bundle 1
+// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
+// ...
+// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
+// Bundle N
+// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
+class TextFileHandler : public FileHandler {
+  /// \brief String that begins a line comment.
+  StringRef Comment;
+
+  /// \brief String that initiates a bundle.
+  std::string BundleStartString;
+
+  /// \brief String that closes a bundle.
+  std::string BundleEndString;
+
+  /// \brief Number of chars read from input.
+  size_t ReadChars;
+
+protected:
+  void ReadHeader(MemoryBuffer &Input) {}
+  StringRef ReadBundleStart(MemoryBuffer &Input) {
+    StringRef FC = Input.getBuffer();
+
+    // Find start of the bundle.
+    ReadChars = FC.find(BundleStartString, ReadChars);
+    if (ReadChars == FC.npos)
+      return StringRef();
+
+    // Get position of the triple.
+    size_t TripleStart = ReadChars = ReadChars + BundleStartString.size();
+
+    // Get position that closes the triple.
+    size_t TripleEnd = ReadChars = FC.find("\n", ReadChars);
+    if (TripleEnd == FC.npos)
+      return StringRef();
+
+    // Next time we read after the new line.
+    ++ReadChars;
+
+    return StringRef(&FC.data()[TripleStart], TripleEnd - TripleStart);
+  }
+  void ReadBundleEnd(MemoryBuffer &Input) {
+    StringRef FC = Input.getBuffer();
+
+    // Read up to the next new line.
+    assert(FC[ReadChars] == '\n' && "The bundle should end with a new line.");
+
+    size_t TripleEnd = ReadChars = FC.find("\n", ReadChars + 1);
+    if (TripleEnd == FC.npos)
+      return;
+
+    // Next time we read after the new line.
+    ++ReadChars;
+
+    return;
+  }
+  void ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) {
+    StringRef FC = Input.getBuffer();
+    size_t BundleStart = ReadChars;
+
+    // Find end of the bundle.
+    size_t BundleEnd = ReadChars = FC.find(BundleEndString, ReadChars);
+
+    StringRef Bundle(&FC.data()[BundleStart], BundleEnd - BundleStart);
+    OS << Bundle;
+  }
+
+  void WriteHeader(raw_fd_ostream &OS,
+                   ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) {}
+  void WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) {
+    OS << BundleStartString << TargetTriple << "\n";
+    return;
+  }
+  void WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) {
+    OS << BundleEndString << TargetTriple << "\n";
+    return;
+  }
+  void WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) {
+    ;
+    OS << Input.getBuffer();
+    return;
+  }
+
+public:
+  TextFileHandler(StringRef Comment)
+      : FileHandler(), Comment(Comment), ReadChars(0) {
+    BundleStartString =
+        "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__START__ ";
+    BundleEndString =
+        "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__END__ ";
+  }
+};
+
+static void PrintVersion() {
+  raw_ostream &OS = outs();
+  OS << clang::getClangToolFullVersion("clang-offload-bundler") << '\n';
+}
+
+int main(int argc, const char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal();
+
+  cl::HideUnrelatedOptions(ClangOffloadBundlerCategory);
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "A tool to bundle several input files of the specified type <type> \n"
+      "referring to the same source file but different targets into a single \n"
+      "one. The resulting file can also be unbundled into different files by \n"
+      "this tool if -unbundle is provided.\n");
+
+  if (Help)
+    cl::PrintHelpMessage();
+
+  bool Error = false;
+  if (Unbundle) {
+    if (InputFileNames.size() != 1) {
+      Error = true;
+      llvm::errs()
+          << "error: only one input file supported in unbundling mode.\n";
+    }
+    if (OutputFileNames.size() != TargetNames.size()) {
+      Error = true;
+      llvm::errs() << "error: number of output files and targets should match "
+                      "in unbundling mode.\n";
+    }
+  } else {
+    if (OutputFileNames.size() != 1) {
+      Error = true;
+      llvm::errs()
+          << "error: only one output file supported in bundling mode.\n";
+    }
+    if (InputFileNames.size() != TargetNames.size()) {
+      Error = true;
+      llvm::errs() << "error: number of input files and targets should match "
+                      "in bundling mode.\n";
+    }
+  }
+
+  std::unique_ptr<FileHandler> FH;
+  FH.reset(StringSwitch<FileHandler *>(FilesType)
+               .Case("i", new TextFileHandler(/*Comment=*/"//"))
+               .Case("ii", new TextFileHandler(/*Comment=*/"//"))
+               .Case("ll", new TextFileHandler(/*Comment=*/";"))
+               .Case("bc", new BinaryFileHandler())
+               .Case("s", new TextFileHandler(/*Comment=*/"#"))
+               .Case("o", new BinaryFileHandler())
+               .Case("gch", new BinaryFileHandler())
+               .Default(nullptr));
+
+  if (!FH.get()) {
+    Error = true;
+    llvm::errs() << "error: invalid file type specified.\n";
+  }
+
+  if (Error)
+    return 1;
+
+  if (Unbundle)
+    return FH->Unbundle();
+  else
+    return FH->Bundle();
+
+  return 0;
+}
Index: tools/clang-offload-bundler/CMakeLists.txt
===================================================================
--- /dev/null
+++ tools/clang-offload-bundler/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_clang_executable(clang-offload-bundler
+  ClangOffloadBundler.cpp
+  )
+
+set(CLANG_OFFLOAD_BUNDLER_LIB_DEPS
+  clangBasic
+  )
+
+target_link_libraries(clang-offload-bundler
+  ${CLANG_OFFLOAD_BUNDLER_LIB_DEPS}
+  )
+
+install(TARGETS clang-offload-bundler RUNTIME DESTINATION bin)
Index: tools/Makefile
===================================================================
--- tools/Makefile
+++ tools/Makefile
@@ -12,7 +12,7 @@
 include $(CLANG_LEVEL)/../../Makefile.config
 
 DIRS := 
-PARALLEL_DIRS := clang-format driver diagtool
+PARALLEL_DIRS := clang-format clang-offload-bundler driver diagtool
 
 ifeq ($(ENABLE_CLANG_STATIC_ANALYZER), 1)
   PARALLEL_DIRS += clang-check
Index: tools/CMakeLists.txt
===================================================================
--- tools/CMakeLists.txt
+++ tools/CMakeLists.txt
@@ -3,6 +3,7 @@
 add_subdirectory(clang-format)
 add_subdirectory(clang-format-vs)
 add_subdirectory(clang-fuzzer)
+add_subdirectory(clang-offload-bundler)
 
 add_subdirectory(c-index-test)
 add_subdirectory(libclang)
Index: test/OpenMP/target_driver.c
===================================================================
--- /dev/null
+++ test/OpenMP/target_driver.c
@@ -0,0 +1,195 @@
+///
+/// Perform several driver tests for OpenMP offloading
+///
+
+/// ###########################################################################
+
+/// Check whether an invalid OpenMP target is specified:
+// RUN:   %clang -### -fopenmp=libomp -omptargets=aaa-bbb-ccc-ddd %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-INVALID-TARGET %s
+// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
+
+/// ###########################################################################
+
+/// Check warning for empty -omptargets
+// RUN:   %clang -### -fopenmp=libomp -omptargets=  %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s
+// CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-omptargets='
+
+/// ###########################################################################
+
+/// Check the phases graph when using a single target, different from the host.
+/// The actions should be exactly the same as if not offloading was being used.
+// RUN:   %clang -ccc-print-phases -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES %s
+
+// CHK-PHASES-DAG: {{.*}}: linker, {[[A0:[0-9]+]]}, image
+// CHK-PHASES-DAG: [[A0]]: assembler, {[[A1:[0-9]+]]}, object
+// CHK-PHASES-DAG: [[A1]]: backend, {[[A2:[0-9]+]]}, assembler
+// CHK-PHASES-DAG: [[A2]]: compiler, {[[A3:[0-9]+]]}, ir
+// CHK-PHASES-DAG: [[A3]]: preprocessor, {[[I:[0-9]+]]}, cpp-output
+// CHK-PHASES-DAG: [[I]]: input, {{.*}}, c
+
+/// ###########################################################################
+
+/// Check the phases when using multiple targets. Again, the actions are the
+/// same as if no offloading was being used. Here we also add a library to make
+/// sure it is not treated as input.
+// RUN:   %clang -ccc-print-phases -lm -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES-LIB %s
+
+// CHK-PHASES-LIB-DAG: {{.*}}: linker, {[[L0:[0-9]+]], [[A0:[0-9]+]]}, image
+// CHK-PHASES-LIB-DAG: [[A0]]: assembler, {[[A1:[0-9]+]]}, object
+// CHK-PHASES-LIB-DAG: [[A1]]: backend, {[[A2:[0-9]+]]}, assembler
+// CHK-PHASES-LIB-DAG: [[A2]]: compiler, {[[A3:[0-9]+]]}, ir
+// CHK-PHASES-LIB-DAG: [[A3]]: preprocessor, {[[I:[0-9]+]]}, cpp-output
+// CHK-PHASES-LIB-DAG: [[I]]: input, {{.*}}, c
+// CHK-PHASES-LIB-DAG: [[L0]]: input, "m", object
+
+/// ###########################################################################
+
+/// Check the phases when using multiple targets and passing an object file as
+/// input. An unbundling action has to be created.
+// RUN:   echo 'bla' > %t.o
+// RUN:   %clang -ccc-print-phases -lm -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.o 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES-OBJ %s
+
+// CHK-PHASES-OBJ-DAG: {{.*}}: linker, {[[L0:[0-9]+]], [[A0:[0-9]+]], [[B0:[0-9]+]]}, image
+// CHK-PHASES-OBJ-DAG: [[A0]]: assembler, {[[A1:[0-9]+]]}, object
+// CHK-PHASES-OBJ-DAG: [[A1]]: backend, {[[A2:[0-9]+]]}, assembler
+// CHK-PHASES-OBJ-DAG: [[A2]]: compiler, {[[A3:[0-9]+]]}, ir
+// CHK-PHASES-OBJ-DAG: [[A3]]: preprocessor, {[[I:[0-9]+]]}, cpp-output
+// CHK-PHASES-OBJ-DAG: [[I]]: input, {{.*}}, c
+// CHK-PHASES-OBJ-DAG: [[L0]]: input, "m", object
+// CHK-PHASES-OBJ-DAG: [[B0]]: clang-offload-unbundler, {[[B1:[0-9]+]]}, object
+// CHK-PHASES-OBJ-DAG: [[B1]]: input, "{{.*}}.o", object
+
+/// ###########################################################################
+
+/// Check the phases when using multiple targets and separate compilation.
+// RUN:   echo 'bla' > %t.s
+// RUN:   %clang -ccc-print-phases -c -lm -fopenmp=libomp -target powerpc64-ibm-linux-gnu -omptargets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %t.s -x cpp-output %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES-SEP %s
+
+// CHK-PHASES-SEP-DAG: [[A:[0-9]+]]: input, "{{.*}}.c", cpp-output
+// CHK-PHASES-SEP-DAG: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A]]}, cpp-output
+// CHK-PHASES-SEP-DAG: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir
+// CHK-PHASES-SEP-DAG: [[A3:[0-9]+]]: backend, {[[A2]]}, assembler
+// CHK-PHASES-SEP-DAG: [[A4:[0-9]+]]: assembler, {[[A3]]}, object
+// CHK-PHASES-SEP-DAG: {{.*}}: clang-offload-bundler, {[[A4]]}, object
+
+// CHK-PHASES-SEP-DAG: [[B:[0-9]+]]: input, "{{.*}}.s", assembler
+// CHK-PHASES-SEP-DAG: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B]]}, assembler
+// CHK-PHASES-SEP-DAG: [[B2:[0-9]+]]: assembler, {[[B1]]}, object
+// CHK-PHASES-SEP-DAG: {{.*}}: clang-offload-bundler, {[[B2]]}, object
+
+/// ###########################################################################
+
+/// Check of the commands passed to each tool when using valid OpenMP targets.
+/// Here we also check that offloading does not break the use of integrated
+/// assembler. It does however preclude the use of integrated preprocessor as
+/// host IR is shared by all the compile phases. There are also two offloading
+/// specific commands:
+/// -fopenmp-is-device: will tell the frontend that it will generate code for a
+/// target.
+/// -omp-host-ir-file-path: specifies the host IR file that can be loaded by
+/// the target code generation to gather information about which declaration
+/// really need to be emitted.
+///
+// RUN:   %clang -### -fopenmp=libomp -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS %s
+// RUN:   %clang -### -fopenmp=libomp -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-ST %s
+//
+
+// Final linking - host (ppc64le)
+// CHK-COMMANDS-DAG:    ld" {{.*}}"-m" "elf64lppc" {{.*}}"-o" "a.out" {{.*}}"[[HSTOBJ:.+]].o" "-lomp" "-lomptarget" {{.*}}"-T" "[[LKSCRIPT:.+]].lk"
+// CHK-COMMANDS-ST-DAG: ld" {{.*}}"-m" "elf64lppc" {{.*}}"-o" "a.out" {{.*}}"[[HSTOBJ:.+]].o" "-lomp" "-lomptarget" {{.*}}"-T" "[[LKSCRIPT:.+]].lk"
+
+// Target 2 commands (x86_64)
+// CHK-COMMANDS-DAG:    ld" {{.*}}"-m" "elf_x86_64" {{.*}}"-shared" {{.*}}"-o" "[[T2LIB:.+]]" {{.*}}"[[T2OBJ:.+]].o" {{.*}}"-lomp"
+// CHK-COMMANDS-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ]].o" "-x" "ir" "[[T2BC:.+]].bc"
+// CHK-COMMANDS-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC]].bc" "-x" "c" "[[SRC:.+]].c" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC:.+]].bc"
+
+// CHK-COMMANDS-ST-DAG:    ld" {{.*}}"-m" "elf_x86_64" {{.*}}"-shared" {{.*}}"-o" "[[T2LIB:.+]]" {{.*}}"[[T2OBJ:.+]].o" {{.*}}"-lomp"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ]].o" "[[T2ASM:.+]].s"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM]].s" "-x" "ir" "[[T2BC:.+]].bc"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC]].bc" "-x" "cpp-output" "[[T2PP:.+]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC:.+]].bc"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP]].i" "-x" "c" "[[SRC:.+]].c"
+
+// Target 1 commands (ppc64le)
+// CHK-COMMANDS-DAG:    ld" {{.*}}"-m" "elf64lppc" {{.*}}"-shared" {{.*}}"-o" "[[T1LIB:.+]]" {{.*}}"[[T1OBJ:.+]].o" {{.*}}"-lomp"
+// CHK-COMMANDS-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ]].o" "-x" "ir" "[[T1BC:.+]].bc"
+// CHK-COMMANDS-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC]].bc" "-x" "c" "[[SRC]].c" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC]].bc"
+
+// CHK-COMMANDS-ST-DAG:    ld" {{.*}}"-m" "elf64lppc" {{.*}}"-shared" {{.*}}"-o" "[[T1LIB:.+]]" {{.*}}"[[T1OBJ:.+]].o" {{.*}}"-lomp"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ]].o" "[[T1ASM:.+]].s"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM]].s" "-x" "ir" "[[T1BC:.+]].bc"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC]].bc" "-x" "cpp-output" "[[T1PP:.+]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[HSTBC]].bc"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP]].i" "-x" "c" "[[SRC]].c"
+
+// Host object generation
+// CHK-COMMANDS-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[HSTOBJ]].o" "-x" "ir" "[[HSTBC]].bc"
+// CHK-COMMANDS-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTBC]].bc" "-x" "c" "[[SRC]].c" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HSTOBJ]].o" "[[HSTASM:.+]].s"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTASM]].s" "-x" "ir" "[[HSTBC:.+]].bc"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTBC]].bc" "-x" "cpp-output" "[[HSTPP:.+]].i" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-COMMANDS-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E"{{.*}}"-fopenmp" {{.*}}"-o" "[[HSTPP]].i" "-x" "c" "[[SRC]].c"
+
+/// ###########################################################################
+
+/// Check separate compilation
+///
+// RUN:   echo 'bla' > %t.s
+// RUN:   %clang -### -fopenmp=libomp -c -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.s -x cpp-output %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-SEP %s
+// RUN:   %clang -### -fopenmp=libomp -c -target powerpc64le-linux -omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.s -x cpp-output %s -save-temps 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-SEP-ST %s
+//
+
+// Unbundle the input files.
+// CHK-COMMANDS-SEP-DAG:    clang-offload-bundler{{.*}}" "-type=s" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[AAASM:.+]].s" "-outputs=[[AAHASM:.+]].s,[[AAT1ASM:.+]].s,[[AAT2ASM:.+]].s" "-unbundle"
+// CHK-COMMANDS-SEP-DAG:    clang-offload-bundler{{.*}}" "-type=i" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[BBPP:.+]].c" "-outputs=[[BBHPP:.+]].i,[[BBT1PP:.+]].i,[[BBT2PP:.+]].i" "-unbundle"
+
+// CHK-COMMANDS-SEP-ST-DAG:    clang-offload-bundler{{.*}}" "-type=s" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[AAASM:.+]].s" "-outputs=[[AAHASM:.+]].s,[[AAT1ASM:.+]].s,[[AAT2ASM:.+]].s" "-unbundle"
+// CHK-COMMANDS-SEP-ST-DAG:    clang-offload-bundler{{.*}}" "-type=i" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-inputs=[[BBPP:.+]].c" "-outputs=[[BBHPP:.+]].i,[[BBT1PP:.+]].i,[[BBT2PP:.+]].i" "-unbundle"
+
+// Create 1st bundle.
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[AAHOBJ:.+]].o" "[[AAHASM]].s"
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT1OBJ:.+]].o" "[[AAT1ASM]].s"
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT2OBJ:.+]].o" "[[AAT2ASM]].s"
+// CHK-COMMANDS-SEP-DAG:    clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[AAOBJ:.+]].o" "-inputs=[[AAHOBJ]].o,[[AAT1OBJ]].o,[[AAT2OBJ]].o"
+
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[AAHOBJ:.+]].o" "[[AAHASM]].s"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT1OBJ:.+]].o" "[[AAT1ASM]].s"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[AAT2OBJ:.+]].o" "[[AAT2ASM]].s"
+// CHK-COMMANDS-SEP-ST-DAG:    clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[AAOBJ:.+]].o" "-inputs=[[AAHOBJ]].o,[[AAT1OBJ]].o,[[AAT2OBJ]].o"
+
+// Create 2nd bundle.
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[BBHBC:.+]].bc" "-x" "cpp-output" "[[BBHPP]].i" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBHOBJ:.+]].o" "-x" "ir" "[[BBHBC]].bc"
+
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc"{{.*}}"-fopenmp" {{.*}}"-o" "[[BBHBC:.+]].bc" "-x" "cpp-output" "[[BBHPP]].i" "-omptargets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBHASM:.+]].s" "-x" "ir" "[[BBHBC]].bc"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[BBHOBJ:.+]].o" "[[BBHASM]].s"
+
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1BC:.+]].bc" "-x" "cpp-output" "[[BBT1PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc"
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1OBJ:.+]].o" "-x" "ir" "[[BBT1BC]].bc"
+
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1BC:.+]].bc" "-x" "cpp-output" "[[BBT1PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT1ASM:.+]].s" "-x" "ir" "[[BBT1BC]].bc"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[BBT1OBJ:.+]].o" "[[BBT1ASM]].s"
+
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2BC:.+]].bc" "-x" "cpp-output" "[[BBT2PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc"
+// CHK-COMMANDS-SEP-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2OBJ:.+]].o" "-x" "ir" "[[BBT2BC]].bc"
+
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2BC:.+]].bc" "-x" "cpp-output" "[[BBT2PP]].i" "-fopenmp-is-device" "-omp-host-ir-file-path" "[[BBHBC]].bc"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[BBT2ASM:.+]].s" "-x" "ir" "[[BBT2BC]].bc"
+// CHK-COMMANDS-SEP-ST-DAG:    clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[BBT2OBJ:.+]].o" "[[BBT2ASM]].s"
+
+// CHK-COMMANDS-SEP-DAG:     clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[BBOBJ:.+]].o" "-inputs=[[BBHOBJ]].o,[[BBT1OBJ]].o,[[BBT2OBJ]].o"
+// CHK-COMMANDS-SEP-ST-DAG:  clang-offload-bundler{{.*}}" "-type=o" "-targets=offload-host-powerpc64le--linux,offload-device-powerpc64le-ibm-linux-gnu,offload-device-x86_64-pc-linux-gnu" "-outputs=[[BBOBJ:.+]].o" "-inputs=[[BBHOBJ]].o,[[BBT1OBJ]].o,[[BBT2OBJ]].o"
+
+
+
Index: lib/Driver/Types.cpp
===================================================================
--- lib/Driver/Types.cpp
+++ lib/Driver/Types.cpp
@@ -140,6 +140,10 @@
   }
 }
 
+bool types::isSrcFile(ID Id) {
+  return Id != TY_Object && getPreprocessedType(Id) != TY_INVALID;
+}
+
 types::ID types::lookupTypeForExtension(const char *Ext) {
   return llvm::StringSwitch<types::ID>(Ext)
            .Case("c", TY_C)
Index: lib/Driver/Tools.h
===================================================================
--- lib/Driver/Tools.h
+++ lib/Driver/Tools.h
@@ -128,6 +128,19 @@
                     const char *LinkingOutput) const override;
 };
 
+/// \brief Offload bundler tool.
+class LLVM_LIBRARY_VISIBILITY OffloadBundler : public Tool {
+public:
+  OffloadBundler(const ToolChain &TC)
+      : Tool("Offload bundler", "clang-offload-bundler", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
 /// \brief Base class for all GNU tools that provide the same behavior when
 /// it comes to response files support
 class LLVM_LIBRARY_VISIBILITY GnuTool : public Tool {
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -179,12 +179,24 @@
 static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
                             const ArgList &Args, ArgStringList &CmdArgs) {
   const Driver &D = TC.getDriver();
+  unsigned NumberOfInputs = Inputs.size();
+
+  // If the current toolchain is an OpenMP host toolchain, we need to ignore
+  // the last inputs - one for each offloading device - as they are going to be
+  // embedded in the fat binary by a custom linker script.
+  if (TC.getOffloadingKind() == ToolChain::OK_OpenMP_Host) {
+    Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ);
+    assert(Tgts && Tgts->getNumValues() &&
+           "OpenMP offloading has to have targets specified.");
+    NumberOfInputs -= Tgts->getNumValues();
+  }
 
   // Add extra linker input arguments which are not treated as inputs
   // (constructed via -Xarch_).
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
-  for (const auto &II : Inputs) {
+  for (unsigned i = 0; i < NumberOfInputs; ++i) {
+    const auto &II = Inputs[i];
     if (!TC.HasNativeLLVMSupport()) {
       // Don't try to pass LLVM inputs unless we have native support.
       if (II.getType() == types::TY_LLVM_IR ||
@@ -222,6 +234,98 @@
     addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
 }
 
+/// \brief Add OpenMP linker script arguments at the end of the argument list
+/// so that the fat binary is built by embedding each of the device images into
+/// the host. The device images are the last inputs, one for each device and
+/// come in the same order the triples are passed through the omptargets option.
+/// The linker script also defines a few symbols required by the code generation
+/// so that the images can be easily retrieved at runtime by the offloading
+/// library. This should be used in tool chains that support linker scripts.
+static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args, ArgStringList &CmdArgs) {
+
+  // If this is not an OpenMP host toolchain, we don't need to do anything.
+  if (TC.getOffloadingKind() != ToolChain::OK_OpenMP_Host)
+    return;
+
+  // Gather the pairs (target triple)-(file name). The files names are at the
+  // end of the input list. So we do a reverse scanning.
+  SmallVector<std::pair<llvm::Triple, const char *>, 4> Targets;
+
+  Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ);
+  assert(Tgts && Tgts->getNumValues() &&
+         "OpenMP offloading has to have targets specified.");
+
+  auto TriplesIt = Tgts->getValues().end();
+  auto FileNamesIt = Inputs.end();
+  for (unsigned i = 0; i < Tgts->getNumValues(); ++i) {
+    --TriplesIt;
+    --FileNamesIt;
+    Targets.push_back(
+        std::make_pair(llvm::Triple(*TriplesIt), FileNamesIt->getFilename()));
+  }
+
+  // Create temporary linker script
+  StringRef Name = llvm::sys::path::filename(Output.getFilename());
+  std::pair<StringRef, StringRef> Split = Name.rsplit('.');
+  std::string TmpName = C.getDriver().GetTemporaryPath(Split.first, "lk");
+  const char *LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
+
+  // Open script file in order to write contents
+  std::error_code EC;
+  llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None);
+
+  if (EC) {
+    C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
+    return;
+  }
+
+  // Add commands to embed target binaries. We ensure that each section and
+  // image s 16-byte aligned. This is not mandatory, but increases the
+  // likelihood of data to be aligned with a cache block in several main host
+  // machines.
+  Lksf << "TARGET(binary)\n";
+  for (unsigned i = 0; i < Targets.size(); ++i)
+    Lksf << "INPUT(" << Targets[i].second << ")\n";
+
+  Lksf << "SECTIONS\n";
+  Lksf << "{\n";
+  Lksf << "  .omp_offloading :\n";
+  Lksf << "  ALIGN(0x10)\n";
+  Lksf << "  {\n";
+
+  for (unsigned i = 0; i < Targets.size(); ++i) {
+    std::string TgtName(Targets[i].first.getTriple());
+    // std::replace(TgtName.begin(), TgtName.end(), '-', '_');
+    Lksf << "    . = ALIGN(0x10);\n";
+    Lksf << "    PROVIDE_HIDDEN(.omp_offloading.img_start." << TgtName
+         << " = .);\n";
+    Lksf << "    " << Targets[i].second << "\n";
+    Lksf << "    PROVIDE_HIDDEN(.omp_offloading.img_end." << TgtName
+         << " = .);\n";
+  }
+
+  Lksf << "  }\n";
+  // Add commands to define host entries begin and end
+  Lksf << "  .omp_offloading.entries :\n";
+  Lksf << "  ALIGN(0x10)\n";
+  Lksf << "  SUBALIGN(0x01)\n";
+  Lksf << "  {\n";
+  Lksf << "    PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n";
+  Lksf << "    *(.omp_offloading.entries)\n";
+  Lksf << "    PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n";
+  Lksf << "  }\n";
+  Lksf << "}\n";
+  Lksf << "INSERT BEFORE .data\n";
+
+  Lksf.close();
+
+  CmdArgs.push_back("-T");
+  CmdArgs.push_back(LKS);
+}
+
 /// \brief Determine whether Objective-C automated reference counting is
 /// enabled.
 static bool isObjCAutoRefCount(const ArgList &Args) {
@@ -3157,10 +3261,16 @@
   assert(Inputs.size() >= 1 && "Must have at least one input.");
   const InputInfo &Input = Inputs[0];
   // CUDA compilation may have multiple inputs (source file + results of
-  // device-side compilations). All other jobs are expected to have exactly one
-  // input.
+  // device-side compilations). OpenMP offloading device compile jobs also take
+  // the host IR as an extra input. All other jobs are expected to have exactly
+  // one input.
   bool IsCuda = types::isCuda(Input.getType());
-  assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
+  bool IsOpenMPDeviceCompileJob =
+      isa<CompileJobAction>(JA) &&
+      getToolChain().getOffloadingKind() == ToolChain::OK_OpenMP_Device;
+  assert((IsCuda || (IsOpenMPDeviceCompileJob && Inputs.size() == 2) ||
+          Inputs.size() == 1) &&
+         "Unable to handle multiple inputs.");
 
   // Invoke ourselves in -cc1 mode.
   //
@@ -5120,6 +5230,37 @@
       CmdArgs.push_back(I->getFilename());
     }
 
+  // OpenMP offloading device jobs take the argument -omp-host-ir-file-path
+  // to specify the result of the compile phase on the host, so the meaningful
+  // device declarations can be identified. Also, -fopenmp-is-device is passed
+  // along to tell the frontend that it is generating code for a device, so that
+  // only the relevant declarations are emitted.
+  if (IsOpenMPDeviceCompileJob) {
+    CmdArgs.push_back("-fopenmp-is-device");
+    CmdArgs.push_back("-omp-host-ir-file-path");
+    CmdArgs.push_back(Args.MakeArgString(Inputs.back().getFilename()));
+  }
+
+  // For all the host OpenMP offloading compile jobs we need to pass the targets
+  // information using -omptargets= option.
+  if (isa<CompileJobAction>(JA) &&
+      getToolChain().getOffloadingKind() == ToolChain::OK_OpenMP_Host) {
+    SmallString<128> TargetInfo("-omptargets=");
+
+    Arg *Tgts = Args.getLastArg(options::OPT_omptargets_EQ);
+    assert(Tgts && Tgts->getNumValues() &&
+           "OpenMP offloading has to have targets specified.");
+    for (unsigned i = 0; i < Tgts->getNumValues(); ++i) {
+      if (i)
+        TargetInfo += ',';
+      // We need to get the string from the triple because it may be not exactly
+      // the same as the one we get directly from the arguments.
+      llvm::Triple T(Tgts->getValue(i));
+      TargetInfo += T.getTriple();
+    }
+    CmdArgs.push_back(Args.MakeArgString(TargetInfo.str()));
+  }
+
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
@@ -5659,6 +5800,72 @@
                    SplitDebugName(Args, Input));
 }
 
+void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const llvm::opt::ArgList &TCArgs,
+                                  const char *LinkingOutput) const {
+
+  // The (un)bundling command looks like this:
+  // clang-offload-bundler -type=bc
+  //   -omptargets=host-triple,device-triple1,device-triple2
+  //   -inputs=input_file
+  //   -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2"
+  //   (-unbundle)
+
+  auto BundledFile = Output;
+  auto UnbundledFiles = Inputs;
+
+  bool IsUnbundle = isa<OffloadUnbundlingJobAction>(JA);
+
+  ArgStringList CmdArgs;
+
+  // Get the type.
+  CmdArgs.push_back(TCArgs.MakeArgString(
+      Twine("-type=") + types::getTypeTempSuffix(BundledFile.getType())));
+
+  // Get the triples. The order is the same that comes in omptargets option.
+  {
+    SmallString<128> Triples;
+    Triples += "-targets=offload-host-";
+    Triples += getToolChain().getTripleString();
+
+    Arg *TargetsArg = TCArgs.getLastArg(options::OPT_omptargets_EQ);
+    for (auto *A : TargetsArg->getValues()) {
+      // We have to use the string that exactly matches the triple here.
+      llvm::Triple T(A);
+      Triples += ",offload-device-";
+      Triples += T.getTriple();
+    }
+    CmdArgs.push_back(TCArgs.MakeArgString(Triples));
+  }
+
+  // Get bundled file command.
+  CmdArgs.push_back(
+      TCArgs.MakeArgString(Twine(IsUnbundle ? "-inputs=" : "-outputs=") +
+                           BundledFile.getFilename()));
+
+  // Get unbundled files command.
+  {
+    SmallString<128> UB(IsUnbundle ? "-outputs=" : "-inputs=");
+    for (unsigned i = 0; i < UnbundledFiles.size(); ++i) {
+      if (i)
+        UB += ',';
+      UB += UnbundledFiles[i].getFilename();
+    }
+    CmdArgs.push_back(TCArgs.MakeArgString(UB));
+  }
+
+  if (IsUnbundle)
+    CmdArgs.push_back("-unbundle");
+
+  // All the inputs are encoded as commands.
+  C.addCommand(llvm::make_unique<Command>(
+      JA, *this,
+      TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
+      CmdArgs, None));
+}
+
 void GnuTool::anchor() {}
 
 void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
@@ -8452,6 +8659,8 @@
           // Already diagnosed.
           break;
         }
+        if (getToolChain().getOffloadingKind() == ToolChain::OK_OpenMP_Host)
+          CmdArgs.push_back("-lomptarget");
       }
 
       AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
@@ -8482,6 +8691,9 @@
     }
   }
 
+  // Add OpenMP offloading linker script args if required.
+  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs);
+
   C.addCommand(llvm::make_unique<Command>(JA, *this, ToolChain.Linker.c_str(),
                                           CmdArgs, Inputs));
 }
Index: lib/Driver/ToolChains.h
===================================================================
--- lib/Driver/ToolChains.h
+++ lib/Driver/ToolChains.h
@@ -201,6 +201,9 @@
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
   bool IsIntegratedAssemblerDefault() const override;
+  llvm::opt::DerivedArgList *
+  TranslateOffloadArgs(const llvm::opt::DerivedArgList &Args,
+                       const char *BoundArch) const override;
 
 protected:
   Tool *getTool(Action::ActionClass AC) const override;
Index: lib/Driver/ToolChains.cpp
===================================================================
--- lib/Driver/ToolChains.cpp
+++ lib/Driver/ToolChains.cpp
@@ -2167,6 +2167,46 @@
   }
 }
 
+llvm::opt::DerivedArgList *
+Generic_GCC::TranslateOffloadArgs(const llvm::opt::DerivedArgList &Args,
+                                  const char *BoundArch) const {
+  // Make sure we always generate a shared library for an OpenMP offloading
+  // target regardless the commands the user passed to the host.
+
+  if (getOffloadingKind() != OK_OpenMP_Device)
+    return nullptr;
+
+  DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
+  const OptTable &Opts = getDriver().getOpts();
+
+  // Request the shared library.
+  DAL->AddFlagArg(0, Opts.getOption(options::OPT_shared));
+  DAL->AddFlagArg(0, Opts.getOption(options::OPT_fPIC));
+
+  // Filter all the arguments we don't care passing to the offloading toolchain
+  // as they can mess up with the creation of a shared library.
+  for (auto *A : Args) {
+    switch ((options::ID)A->getOption().getID()) {
+    default:
+      DAL->append(A);
+      break;
+    case options::OPT_shared:
+    case options::OPT_static:
+    case options::OPT_fPIC:
+    case options::OPT_fno_PIC:
+    case options::OPT_fpic:
+    case options::OPT_fno_pic:
+    case options::OPT_fPIE:
+    case options::OPT_fno_PIE:
+    case options::OPT_fpie:
+    case options::OPT_fno_pie:
+      break;
+    }
+  }
+
+  return DAL;
+}
+
 void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
                                         ArgStringList &CC1Args) const {
   const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -65,7 +65,8 @@
 ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
                      const ArgList &Args)
     : D(D), Triple(T), Args(Args), CachedRTTIArg(GetRTTIArgument(Args)),
-      CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)) {
+      CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)),
+      CachedOffloadingKind(OK_None) {
   if (Arg *A = Args.getLastArg(options::OPT_mthread_model))
     if (!isThreadModelSupported(A->getValue()))
       D.Diag(diag::err_drv_invalid_thread_model_for_target)
@@ -182,6 +183,12 @@
   return std::make_pair(Target, ModeFlag);
 }
 
+void ToolChain::setOffloadingKind(OffloadingKind OK) {
+  assert(CachedOffloadingKind == OK_None &&
+         "Offloading kind not expected to change once it is set.");
+  CachedOffloadingKind = OK;
+}
+
 StringRef ToolChain::getDefaultUniversalArchName() const {
   // In universal driver terms, the arch name accepted by -arch isn't exactly
   // the same as the ones that appear in the triple. Roughly speaking, this is
@@ -235,6 +242,12 @@
   return Link.get();
 }
 
+Tool *ToolChain::getOffloadBundler() const {
+  if (!OffloadBundler)
+    OffloadBundler.reset(new tools::OffloadBundler(*this));
+  return OffloadBundler.get();
+}
+
 Tool *ToolChain::getTool(Action::ActionClass AC) const {
   switch (AC) {
   case Action::AssembleJobClass:
@@ -260,6 +273,10 @@
   case Action::VerifyPCHJobClass:
   case Action::BackendJobClass:
     return getClang();
+
+  case Action::OffloadBundlingJobClass:
+  case Action::OffloadUnbundlingJobClass:
+    return getOffloadBundler();
   }
 
   llvm_unreachable("Invalid tool kind.");
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -83,6 +83,7 @@
   delete Opts;
 
   llvm::DeleteContainerSeconds(ToolChains);
+  llvm::DeleteContainerSeconds(OffloadToolChains);
 }
 
 void Driver::ParseDriverMode(ArrayRef<const char *> Args) {
@@ -138,7 +139,10 @@
     }
 
     // Warn about -mcpu= without an argument.
-    if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
+    if ((A->getOption().matches(options::OPT_mcpu_EQ) &&
+         A->containsValue("")) ||
+        (A->getOption().matches(options::OPT_omptargets_EQ) &&
+         !A->getNumValues())) {
       Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
     }
   }
@@ -194,6 +198,251 @@
   return FinalPhase;
 }
 
+/// \brief Return true if the provided arguments require OpenMP offloading.
+static bool RequiresOpenMPOffloading(ArgList &Args) {
+  if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+                   options::OPT_fno_openmp, false)) {
+    StringRef OpenMPRuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME);
+    if (const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ))
+      OpenMPRuntimeName = A->getValue();
+
+    if (OpenMPRuntimeName == "libomp" || OpenMPRuntimeName == "libiomp5") {
+      auto *A = Args.getLastArg(options::OPT_omptargets_EQ);
+      return A != nullptr && A->getNumValues();
+    }
+  }
+  return false;
+}
+/// \brief Return true if the provided tool chain require OpenMP offloading.
+static bool RequiresOpenMPOffloading(const ToolChain *TC) {
+  return TC->getOffloadingKind() == ToolChain::OK_OpenMP_Host ||
+         TC->getOffloadingKind() == ToolChain::OK_OpenMP_Device;
+}
+
+/// \brief Dump the job bindings for a given action.
+static void DumpJobBindings(ArrayRef<const ToolChain *> TCs, StringRef ToolName,
+                            ArrayRef<InputInfo> Inputs,
+                            ArrayRef<InputInfo> Outputs) {
+
+  llvm::errs() << "# \"";
+  for (unsigned i = 0, e = TCs.size(); i != e; ++i) {
+    llvm::errs() << TCs[i]->getTripleString();
+    if (i + 1 != e)
+      llvm::errs() << ", ";
+  }
+
+  llvm::errs() << "\" - \"" << ToolName << "\", inputs: [";
+  for (unsigned i = 0, e = Inputs.size(); i != e; ++i) {
+    llvm::errs() << Inputs[i].getAsString();
+    if (i + 1 != e)
+      llvm::errs() << ", ";
+  }
+  llvm::errs() << "], ";
+  llvm::errs() << ((Outputs.size() > 1) ? "outputs: [" : "output: ");
+  for (unsigned i = 0, e = Outputs.size(); i != e; ++i) {
+    llvm::errs() << Outputs[i].getAsString();
+    if (i + 1 != e)
+      llvm::errs() << ", ";
+  }
+  llvm::errs() << ((Outputs.size() > 1) ? "]\n" : "\n");
+  return;
+}
+
+/// \brief Create output for a given action, if any.
+static InputInfo CreateActionResult(Compilation &C, const Action *A,
+                                    const char *BaseInput,
+                                    const char *BoundArch, bool AtTopLevel,
+                                    bool MultipleArchs) {
+  InputInfo Result;
+  const JobAction *JA = cast<JobAction>(A);
+  if (JA->getType() == types::TY_Nothing)
+    Result = InputInfo(A->getType(), BaseInput);
+  else
+    Result =
+        InputInfo(C.getDriver().GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
+                                                   AtTopLevel, MultipleArchs),
+                  A->getType(), BaseInput);
+  return Result;
+}
+
+static const char *CreateOffloadingPseudoArchName(Compilation &C,
+                                                  const ToolChain *TC) {
+  SmallString<128> Name;
+  switch (TC->getOffloadingKind()) {
+  default:
+    llvm_unreachable("Offload information was not specified.");
+    break;
+  case ToolChain::OK_OpenMP_Host:
+    Name = "offload-host-";
+    break;
+  case ToolChain::OK_OpenMP_Device:
+    Name = "offload-device-";
+    break;
+  }
+
+  Name += TC->getTripleString();
+  return C.getArgs().MakeArgString(Name.str());
+}
+
+InputInfo Driver::CreateUnbundledOffloadingResult(
+    Compilation &C, const OffloadUnbundlingJobAction *CurAction,
+    const ToolChain *TC, InputInfo Result,
+    OffloadingHostResultsTy &OffloadingHostResults) const {
+  assert(!OrderedOffloadingToolchains.empty() &&
+         !types::isSrcFile(Result.getType()) &&
+         "Not expecting to create a bundling action!");
+
+  // If this is an offloading device toolchain, we need to use the results
+  // cached when the host input was processed, except if the input is a source
+  // file.
+  if (TC->getOffloadingKind() == ToolChain::OK_OpenMP_Device) {
+    // If this is not a source file, it had to be part of a bundle. So we need
+    // to checkout the results created by the host when this input was processed
+    // for the host toolchain.
+    auto ILIt = OffloadingHostResults.find(CurAction);
+    assert(ILIt != OffloadingHostResults.end() &&
+           "Offloading inputs do not exist??");
+    InputInfoList &IL = ILIt->getSecond();
+    assert(IL.size() == OrderedOffloadingToolchains.size() + 1 &&
+           "Not all offloading inputs exist??");
+
+    // Get the order of the toolchain and retrieve the input;
+    unsigned Order = 1;
+    for (auto *OffloadTC : OrderedOffloadingToolchains) {
+      if (OffloadTC == TC)
+        break;
+      ++Order;
+    }
+    return IL[Order];
+  }
+
+  // Otherwise, this input is expected to be bundled. Therefore we need to issue
+  // an unbundling command.
+
+  // The bundled file is the input.
+  InputInfo BundledFile = Result;
+
+  // Create the input info for the unbundled files.
+  InputInfoList &UnbundledFiles = OffloadingHostResults[CurAction];
+  {
+    InputInfo HostResult = CreateActionResult(
+        C, CurAction, Result.getBaseInput(),
+        CreateOffloadingPseudoArchName(C, TC), /*AtTopLevel=*/
+        false, /*MultipleArchs=*/false);
+    UnbundledFiles.push_back(HostResult);
+    for (auto *OffloadTC : OrderedOffloadingToolchains) {
+      InputInfo TargetResult = CreateActionResult(
+          C, CurAction, Result.getBaseInput(),
+          CreateOffloadingPseudoArchName(C, OffloadTC), /*AtTopLevel=*/
+          false, /*MultipleArchs=*/false);
+      UnbundledFiles.push_back(TargetResult);
+    }
+  }
+
+  auto OffloadBundlerTool = TC->SelectTool(*CurAction);
+
+  // Emit the command or dump the bindings.
+  if (CCCPrintBindings && !CCGenDiagnostics) {
+    SmallVector<const ToolChain *, 4> AllToolChains;
+    AllToolChains.push_back(TC);
+    AllToolChains.append(OrderedOffloadingToolchains.begin(),
+                         OrderedOffloadingToolchains.end());
+    DumpJobBindings(AllToolChains, OffloadBundlerTool->getName(), BundledFile,
+                    UnbundledFiles);
+  } else {
+    OffloadBundlerTool->ConstructJob(C, *CurAction, BundledFile, UnbundledFiles,
+                                     C.getArgs(), nullptr);
+  }
+
+  // The host result is the first of the unbundled files.
+  return UnbundledFiles.front();
+}
+
+InputInfo Driver::CreateBundledOffloadingResult(
+    Compilation &C, const OffloadBundlingJobAction *CurAction,
+    const ToolChain *TC, InputInfoList Results) const {
+  assert(!OrderedOffloadingToolchains.empty() &&
+         "Not expecting to create a bundling action!");
+
+  // Get the result file based on BaseInput file name and the previous host
+  // action.
+  InputInfo BundledFile = CreateActionResult(
+      C, *CurAction->begin(), Results[0].getBaseInput(), /*BoundArch=*/nullptr,
+      /*AtTopLevel=*/true, /*MultipleArchs=*/false);
+
+  // The unbundled files are the previous action result for each target.
+  InputInfoList &UnbundledFiles = Results;
+
+  // Create the bundling command.
+  auto OffloadBundlerTool = TC->SelectTool(*CurAction);
+
+  // Emit the command or dump the bindings.
+  if (CCCPrintBindings && !CCGenDiagnostics) {
+    SmallVector<const ToolChain *, 4> AllToolChains;
+    AllToolChains.push_back(TC);
+    AllToolChains.append(OrderedOffloadingToolchains.begin(),
+                         OrderedOffloadingToolchains.end());
+    DumpJobBindings(AllToolChains, OffloadBundlerTool->getName(),
+                    UnbundledFiles, BundledFile);
+  } else {
+    OffloadBundlerTool->ConstructJob(C, *CurAction, BundledFile, UnbundledFiles,
+                                     C.getArgs(), nullptr);
+  }
+
+  return BundledFile;
+}
+
+void Driver::PostProcessOffloadingInputsAndResults(
+    Compilation &C, const JobAction *JA, const ToolChain *TC,
+    InputInfoList &Inputs, InputInfo &Result,
+    OffloadingHostResultsTy &OffloadingHostResults) const {
+
+  // If this driver run requires OpenMP offloading we need to make sure
+  // everything gets combined at link time. Also, all the compile phase results
+  // obtained for the host are used as inputs in the device side.
+  if (RequiresOpenMPOffloading(TC)) {
+
+    if (isa<LinkJobAction>(JA) &&
+        TC->getOffloadingKind() == ToolChain::OK_OpenMP_Host) {
+      // Get link results for all targets.
+      InputInfoList TgtLinkResults(OrderedOffloadingToolchains.size());
+      for (unsigned i = 0; i < OrderedOffloadingToolchains.size(); ++i) {
+        const ToolChain *TgtTC = OrderedOffloadingToolchains[i];
+        BuildJobsForAction(C, JA, TgtTC,
+                           CreateOffloadingPseudoArchName(C, TgtTC),
+                           /*AtTopLevel=*/false,
+                           /*MultipleArchs=*/true, /*LinkingOutput=*/nullptr,
+                           TgtLinkResults[i], OffloadingHostResults);
+      }
+      Inputs.append(TgtLinkResults.begin(), TgtLinkResults.end());
+      return;
+    }
+
+    if (isa<CompileJobAction>(JA) &&
+        TC->getOffloadingKind() == ToolChain::OK_OpenMP_Device) {
+      // Find the host compile result.
+      auto ILIt = OffloadingHostResults.find(JA);
+      assert(ILIt != OffloadingHostResults.end() &&
+             "The OpenMP host side action is expected to be processed before!");
+      InputInfoList &IL = ILIt->getSecond();
+      assert(IL.size() == 1 && "Host compile results should only be one!");
+      Inputs.push_back(IL.front());
+      return;
+    }
+
+    // If this is a host action, make sure it is recorded in the offloading
+    // results cache.
+    if (TC->getOffloadingKind() == ToolChain::OK_OpenMP_Host)
+      OffloadingHostResults[JA].push_back(Result);
+
+    return;
+  }
+
+  //
+  // Add post-processing code for other offloading implementations here.
+  //
+}
+
 static Arg *MakeInputArg(DerivedArgList &Args, OptTable *Opts,
                          StringRef Value) {
   Arg *A = new Arg(Opts->getOption(options::OPT_INPUT), Value,
@@ -454,9 +703,44 @@
   // Perform the default argument translations.
   DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
 
+  // Check if we need offloading support by the toolchains.
+  ToolChain::OffloadingKind HostOffloadingKind = ToolChain::OK_None;
+  ToolChain::OffloadingKind DeviceOffloadingKind = ToolChain::OK_None;
+  // Check if we need OpenMP offloading
+  if (RequiresOpenMPOffloading(*UArgs)) {
+    HostOffloadingKind = ToolChain::OK_OpenMP_Host;
+    DeviceOffloadingKind = ToolChain::OK_OpenMP_Device;
+  }
+
   // Owned by the host.
   const ToolChain &TC =
-      getToolChain(*UArgs, computeTargetTriple(DefaultTargetTriple, *UArgs));
+      getToolChain(*UArgs, computeTargetTriple(DefaultTargetTriple, *UArgs),
+                   HostOffloadingKind);
+
+  // Get the toolchains for the offloading targets if any. We need to read the
+  // offloading toolchains only if we have a compatible runtime library, ant
+  // that would be either libomp or libiomp.
+  OrderedOffloadingToolchains.clear();
+
+  if (DeviceOffloadingKind == ToolChain::OK_OpenMP_Device) {
+    Arg *Tgts = UArgs->getLastArg(options::OPT_omptargets_EQ);
+    assert(Tgts && Tgts->getNumValues() &&
+           "OpenMP offloading has to have targets specified.");
+
+    for (unsigned v = 0; v < Tgts->getNumValues(); ++v) {
+      const char *Val = Tgts->getValue(v);
+      llvm::Triple TT(Val);
+
+      // If the specified target is invalid, emit error
+      if (TT.getArch() == llvm::Triple::UnknownArch)
+        Diag(clang::diag::err_drv_invalid_omp_target) << Val;
+      else {
+        const ToolChain &OffloadTC =
+            getToolChain(*UArgs, TT, DeviceOffloadingKind);
+        OrderedOffloadingToolchains.push_back(&OffloadTC);
+      }
+    }
+  }
 
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
@@ -1445,6 +1729,15 @@
 
     // Build the pipeline for this file.
     std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
+
+    // If we need to support offloading, run an unbundling job before each input
+    // to make sure that bundled files get unbundled. If the input is a source
+    // file that is not required.
+    if (!OrderedOffloadingToolchains.empty() &&
+        InputArg->getOption().getKind() == llvm::opt::Option::InputClass &&
+        !types::isSrcFile(InputType))
+      Current.reset(new OffloadUnbundlingJobAction(std::move(Current)));
+
     for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
          i != e; ++i) {
       phases::ID Phase = *i;
@@ -1481,8 +1774,15 @@
     }
 
     // If we ended with something, add to the output list.
-    if (Current)
+    if (Current) {
+      // If we need to support offloading, run a bundling job for each output
+      // that is not a linker action. Linker actions is when device images are
+      // usually embedded into the host to form a fat binary.
+      if (!OrderedOffloadingToolchains.empty())
+        Current.reset(new OffloadBundlingJobAction(std::move(Current)));
+
       Actions.push_back(Current.release());
+    }
   }
 
   // Add a link action if necessary.
@@ -1617,6 +1917,10 @@
       if (A->getOption().matches(options::OPT_arch))
         ArchNames.insert(A->getValue());
 
+  // Cleanup the offloading host cache so that cached results of previous runs
+  // are not used. This is required for when clang is used as library.
+  OffloadingHostResultsTy OffloadingHostResults;
+
   for (Action *A : C.getActions()) {
     // If we are linking an image for multiple archs then the linker wants
     // -arch_multiple and -final_output <final image name>. Unfortunately, this
@@ -1637,7 +1941,8 @@
                        /*BoundArch*/ nullptr,
                        /*AtTopLevel*/ true,
                        /*MultipleArchs*/ ArchNames.size() > 1,
-                       /*LinkingOutput*/ LinkingOutput, II);
+                       /*LinkingOutput*/ LinkingOutput, II,
+                       OffloadingHostResults);
   }
 
   // If the user passed -Qunused-arguments or there were errors, don't warn
@@ -1708,28 +2013,35 @@
     // A BackendJob is always preceded by a CompileJob, and without
     // -save-temps they will always get combined together, so instead of
     // checking the backend tool, check if the tool for the CompileJob
-    // has an integrated assembler.
-    const ActionList *BackendInputs = &(*Inputs)[0]->getInputs();
-    // Compile job may be wrapped in CudaHostAction, extract it if
-    // that's the case and update CollapsedCHA if we combine phases.
-    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*BackendInputs->begin());
-    JobAction *CompileJA =
-        cast<CompileJobAction>(CHA ? *CHA->begin() : *BackendInputs->begin());
-    assert(CompileJA && "Backend job is not preceeded by compile job.");
-    const Tool *Compiler = TC->SelectTool(*CompileJA);
-    if (!Compiler)
+    // has an integrated assembler. However, if OpenMP offloading is required
+    // the backend and compile jobs have to be kept separate and an integrated
+    // assembler of the backend job will be queried instead.
+    JobAction *CurJA = cast<BackendJobAction>(*Inputs->begin());
+    const ActionList *BackendInputs = &CurJA->getInputs();
+    CudaHostAction *CHA = nullptr;
+    if (!RequiresOpenMPOffloading(TC)) {
+      // Compile job may be wrapped in CudaHostAction, extract it if
+      // that's the case and update CollapsedCHA if we combine phases.
+      CHA = dyn_cast<CudaHostAction>(*CurJA->begin());
+      CurJA =
+          cast<CompileJobAction>(CHA ? *CHA->begin() : *BackendInputs->begin());
+      assert(CurJA && "Backend job is not preceeded by compile job.");
+    }
+    const Tool *CurTool = TC->SelectTool(*CurJA);
+    if (!CurTool)
       return nullptr;
-    if (Compiler->hasIntegratedAssembler()) {
-      Inputs = &CompileJA->getInputs();
-      ToolForJob = Compiler;
+    if (CurTool->hasIntegratedAssembler()) {
+      Inputs = &CurJA->getInputs();
+      ToolForJob = CurTool;
       CollapsedCHA = CHA;
     }
   }
 
   // A backend job should always be combined with the preceding compile job
   // unless OPT_save_temps is enabled and the compiler is capable of emitting
-  // LLVM IR as an intermediate output.
-  if (isa<BackendJobAction>(JA)) {
+  // LLVM IR as an intermediate output. The OpenMP offloading implementation
+  // also requires the Compile and Backend jobs to be separate.
+  if (isa<BackendJobAction>(JA) && !RequiresOpenMPOffloading(TC)) {
     // Check if the compiler supports emitting LLVM IR.
     assert(Inputs->size() == 1);
     // Compile job may be wrapped in CudaHostAction, extract it if
@@ -1769,23 +2081,57 @@
                                 const ToolChain *TC, const char *BoundArch,
                                 bool AtTopLevel, bool MultipleArchs,
                                 const char *LinkingOutput,
-                                InputInfo &Result) const {
+                                InputInfo &Result, 
+																OffloadingHostResultsTy &OffloadingHostResults) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
   InputInfoList CudaDeviceInputInfos;
   if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
     InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
       BuildJobsForAction(C, DA, TC, "", AtTopLevel,
-                         /*MultipleArchs*/ false, LinkingOutput, II);
+                         /*MultipleArchs*/ false, LinkingOutput, II,
+                         OffloadingHostResults);
       CudaDeviceInputInfos.push_back(II);
     }
     // Override current action with a real host compile action and continue
     // processing it.
     A = *CHA->begin();
   }
 
+  if (const OffloadUnbundlingJobAction *OUA =
+          dyn_cast<OffloadUnbundlingJobAction>(A)) {
+    // The input of the unbundling job has to be a single input non-source file,
+    // so we do not consider it having multiple architectures. We just use the
+    // naming that a regular host input file would have.
+    BuildJobsForAction(C, *OUA->begin(), TC, BoundArch, AtTopLevel,
+                       /*MultipleArchs=*/false, LinkingOutput, Result,
+                       OffloadingHostResults);
+    Result = CreateUnbundledOffloadingResult(C, OUA, TC, Result,
+                                             OffloadingHostResults);
+    return;
+  }
+
+  if (const OffloadBundlingJobAction *OBA =
+          dyn_cast<OffloadBundlingJobAction>(A)) {
+    // Compute the input action for all devices and emit a bundling command.
+    InputInfoList Results(OrderedOffloadingToolchains.size() + 1);
+    for (unsigned i = 0; i < Results.size(); ++i) {
+      const ToolChain *CurTC = i ? OrderedOffloadingToolchains[i - 1] : TC;
+      // The input job of the bundling action is meant for multiple targets and
+      // is not a top level job - the bundling job is the top level for the
+      // current output.
+      BuildJobsForAction(C, *OBA->begin(), CurTC,
+                         CreateOffloadingPseudoArchName(C, CurTC),
+                         /*AtTopLevel=*/false,
+                         /*MultipleArchs=*/true, LinkingOutput, Results[i],
+                         OffloadingHostResults);
+    }
+    Result = CreateBundledOffloadingResult(C, OBA, TC, Results);
+    return;
+  }
+
   if (const InputAction *IA = dyn_cast<InputAction>(A)) {
     // FIXME: It would be nice to not claim this here; maybe the old scheme of
     // just using Args was better?
@@ -1812,16 +2158,17 @@
       TC = &C.getDefaultToolChain();
 
     BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
-                       MultipleArchs, LinkingOutput, Result);
+                       MultipleArchs, LinkingOutput, Result,
+                       OffloadingHostResults);
     return;
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
     BuildJobsForAction(
         C, *CDA->begin(),
         &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
         CDA->getGpuArchName(), CDA->isAtTopLevel(),
-        /*MultipleArchs*/ true, LinkingOutput, Result);
+        /*MultipleArchs*/ true, LinkingOutput, Result, OffloadingHostResults);
     return;
   }
 
@@ -1840,7 +2187,8 @@
     InputInfo II;
     for (const Action *DA : CollapsedCHA->getDeviceActions()) {
       BuildJobsForAction(C, DA, TC, "", AtTopLevel,
-                         /*MultipleArchs*/ false, LinkingOutput, II);
+                         /*MultipleArchs*/ false, LinkingOutput, II,
+                         OffloadingHostResults);
       CudaDeviceInputInfos.push_back(II);
     }
   }
@@ -1857,7 +2205,7 @@
 
     InputInfo II;
     BuildJobsForAction(C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs,
-                       LinkingOutput, II);
+                       LinkingOutput, II, OffloadingHostResults);
     InputInfos.push_back(II);
   }
 
@@ -1874,26 +2222,19 @@
     InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end());
 
   // Determine the place to write output to, if any.
-  if (JA->getType() == types::TY_Nothing)
-    Result = InputInfo(A->getType(), BaseInput);
-  else
-    Result = InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
-                                          AtTopLevel, MultipleArchs),
-                       A->getType(), BaseInput);
+  Result =
+      CreateActionResult(C, A, BaseInput, BoundArch, AtTopLevel, MultipleArchs);
 
-  if (CCCPrintBindings && !CCGenDiagnostics) {
-    llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
-                 << " - \"" << T->getName() << "\", inputs: [";
-    for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
-      llvm::errs() << InputInfos[i].getAsString();
-      if (i + 1 != e)
-        llvm::errs() << ", ";
-    }
-    llvm::errs() << "], output: " << Result.getAsString() << "\n";
-  } else {
+  // Post-process inputs and results to suit the needs of the offloading
+  // implementations.
+  PostProcessOffloadingInputsAndResults(C, JA, TC, InputInfos, Result,
+                                        OffloadingHostResults);
+
+  if (CCCPrintBindings && !CCGenDiagnostics)
+    DumpJobBindings(&T->getToolChain(), T->getName(), InputInfos, Result);
+  else
     T->ConstructJob(C, *JA, Result, InputInfos,
                     C.getArgsForToolChain(TC, BoundArch), LinkingOutput);
-  }
 }
 
 const char *Driver::getDefaultImageName() const {
@@ -2188,10 +2529,14 @@
   return Path.str();
 }
 
-const ToolChain &Driver::getToolChain(const ArgList &Args,
-                                      const llvm::Triple &Target) const {
-
-  ToolChain *&TC = ToolChains[Target.str()];
+const ToolChain &
+Driver::getToolChain(const ArgList &Args, const llvm::Triple &Target,
+                     ToolChain::OffloadingKind OffloadingKind) const {
+  // If this is an offload toolchain we need to try to get it from the right
+  // cache.
+  bool IsOffloadingDevice = (OffloadingKind == ToolChain::OK_OpenMP_Device);
+  ToolChain *&TC = *((IsOffloadingDevice) ? &OffloadToolChains[Target.str()]
+                                          : &ToolChains[Target.str()]);
   if (!TC) {
     switch (Target.getOS()) {
     case llvm::Triple::CloudABI:
@@ -2289,6 +2634,8 @@
       }
     }
   }
+  // Set the offloading kind for this toolchain.
+  TC->setOffloadingKind(OffloadingKind);
   return *TC;
 }
 
Index: lib/Driver/Compilation.cpp
===================================================================
--- lib/Driver/Compilation.cpp
+++ lib/Driver/Compilation.cpp
@@ -59,9 +59,16 @@
 
   DerivedArgList *&Entry = TCArgs[std::make_pair(TC, BoundArch)];
   if (!Entry) {
-    Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch);
-    if (!Entry)
-      Entry = TranslatedArgs;
+    DerivedArgList *DefaultArgs = TC->TranslateArgs(*TranslatedArgs, BoundArch);
+    Entry = (DefaultArgs) ? DefaultArgs : TranslatedArgs;
+
+    // Check if there is any offloading specific translation to do.
+    DerivedArgList *OffloadArgs = TC->TranslateOffloadArgs(*Entry, BoundArch);
+    if (OffloadArgs) {
+      // There are offloading translated args, so we have to use them instead.
+      delete DefaultArgs;
+      Entry = OffloadArgs;
+    }
   }
 
   return *Entry;
Index: lib/Driver/Action.cpp
===================================================================
--- lib/Driver/Action.cpp
+++ lib/Driver/Action.cpp
@@ -26,6 +26,10 @@
   case BindArchClass: return "bind-arch";
   case CudaDeviceClass: return "cuda-device";
   case CudaHostClass: return "cuda-host";
+  case OffloadBundlingJobClass:
+    return "clang-offload-bundler";
+  case OffloadUnbundlingJobClass:
+    return "clang-offload-unbundler";
   case PreprocessJobClass: return "preprocessor";
   case PrecompileJobClass: return "precompiler";
   case AnalyzeJobClass: return "analyzer";
@@ -78,14 +82,29 @@
 
 void JobAction::anchor() {}
 
+JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input)
+    : Action(Kind, std::move(Input)) {}
+
 JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input,
                      types::ID Type)
     : Action(Kind, std::move(Input), Type) {}
 
 JobAction::JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type)
   : Action(Kind, Inputs, Type) {
 }
 
+void OffloadBundlingJobAction::anchor() {}
+
+OffloadBundlingJobAction::OffloadBundlingJobAction(
+    std::unique_ptr<Action> Input)
+    : JobAction(OffloadBundlingJobClass, std::move(Input)) {}
+
+void OffloadUnbundlingJobAction::anchor() {}
+
+OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(
+    std::unique_ptr<Action> Input)
+    : JobAction(OffloadUnbundlingJobClass, std::move(Input)) {}
+
 void PreprocessJobAction::anchor() {}
 
 PreprocessJobAction::PreprocessJobAction(std::unique_ptr<Action> Input,
Index: include/clang/Driver/Types.h
===================================================================
--- include/clang/Driver/Types.h
+++ include/clang/Driver/Types.h
@@ -69,6 +69,11 @@
   /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and headers).
   bool isObjC(ID Id);
 
+  /// isSrcFile - Is this a source file, i.e. something that still has to be
+  /// preprocessed. The logic behind this is the same that decides the first
+  /// compilation phase is a preprocesing one.
+  bool isSrcFile(ID Id);
+
   /// lookupTypeForExtension - Lookup the type to use for the file
   /// extension \p Ext.
   ID lookupTypeForExtension(const char *Ext);
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -64,13 +64,20 @@
     RM_DisabledImplicitly
   };
 
+  enum OffloadingKind {
+    OK_None,
+    OK_OpenMP_Host,
+    OK_OpenMP_Device,
+  };
+
 private:
   const Driver &D;
   const llvm::Triple Triple;
   const llvm::opt::ArgList &Args;
   // We need to initialize CachedRTTIArg before CachedRTTIMode
   const llvm::opt::Arg *const CachedRTTIArg;
   const RTTIMode CachedRTTIMode;
+  OffloadingKind CachedOffloadingKind;
 
   /// The list of toolchain specific path prefixes to search for
   /// files.
@@ -83,10 +90,12 @@
   mutable std::unique_ptr<Tool> Clang;
   mutable std::unique_ptr<Tool> Assemble;
   mutable std::unique_ptr<Tool> Link;
+  mutable std::unique_ptr<Tool> OffloadBundler;
   Tool *getClang() const;
   Tool *getAssemble() const;
   Tool *getLink() const;
   Tool *getClangAs() const;
+  Tool *getOffloadBundler() const;
 
   mutable std::unique_ptr<SanitizerArgs> SanitizerArguments;
 
@@ -126,6 +135,9 @@
   vfs::FileSystem &getVFS() const;
   const llvm::Triple &getTriple() const { return Triple; }
 
+  OffloadingKind getOffloadingKind() const { return CachedOffloadingKind; }
+  void setOffloadingKind(OffloadingKind OT);
+
   llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
   StringRef getArchName() const { return Triple.getArchName(); }
   StringRef getPlatform() const { return Triple.getVendorName(); }
@@ -182,6 +194,18 @@
     return nullptr;
   }
 
+  /// TranslateOffloadArgs - Create a new derived argument list for any argument
+  /// translations this ToolChain may wish to perform if supporting offloading,
+  // or 0 if no tool chain specific translations are needed. If this tool chain
+  // does not refer to an offloading tool chain 0 is returned too.
+  ///
+  /// \param BoundArch - The bound architecture name, or 0.
+  virtual llvm::opt::DerivedArgList *
+  TranslateOffloadArgs(const llvm::opt::DerivedArgList &Args,
+                       const char *BoundArch) const {
+    return nullptr;
+  }
+
   /// Choose a tool to use to handle the action \p JA.
   ///
   /// This can be overridden when a particular ToolChain needs to use
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1580,6 +1580,8 @@
 def object : Flag<["-"], "object">;
 def o : JoinedOrSeparate<["-"], "o">, Flags<[DriverOption, RenderAsInput, CC1Option, CC1AsOption]>,
   HelpText<"Write output to <file>">, MetaVarName<"<file>">;
+def omptargets_EQ : CommaJoined<["-"], "omptargets=">, Flags<[DriverOption, CC1Option]>,
+  HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">;
 def pagezero__size : JoinedOrSeparate<["-"], "pagezero_size">;
 def pass_exit_codes : Flag<["-", "--"], "pass-exit-codes">, Flags<[Unsupported]>;
 def pedantic_errors : Flag<["-", "--"], "pedantic-errors">, Group<pedantic_Group>, Flags<[CC1Option]>;
Index: include/clang/Driver/Driver.h
===================================================================
--- include/clang/Driver/Driver.h
+++ include/clang/Driver/Driver.h
@@ -14,6 +14,8 @@
 #include "clang/Basic/LLVM.h"
 #include "clang/Driver/Phases.h"
 #include "clang/Driver/Types.h"
+#include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
 #include "clang/Driver/Util.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -46,11 +48,10 @@
   class Action;
   class Command;
   class Compilation;
-  class InputInfo;
+  class InputAction;
   class JobList;
   class JobAction;
   class SanitizerArgs;
-  class ToolChain;
 
 /// Driver - Encapsulate logic for constructing compilation processes
 /// from a set of gcc-driver-like command line arguments.
@@ -190,7 +191,49 @@
   /// stored in it, and will clean them up when torn down.
   mutable llvm::StringMap<ToolChain *> ToolChains;
 
+  /// \brief Cache of all the ToolChains in use by the driver.
+  ///
+  /// This maps from the string representation of a triple that refers to an
+  /// offloading target to a ToolChain created targeting that triple. The driver
+  /// owns all the ToolChain objects stored in it, and will clean them up when
+  /// torn down. We use a different cache for offloading as it is possible to
+  /// have offloading toolchains with the same triple the host has, and the
+  /// implementation has to differentiate the two in order to adjust the
+  /// commands for offloading.
+  mutable llvm::StringMap<ToolChain *> OffloadToolChains;
+
+  /// \brief Array of the toolchains of offloading targets in the order they
+  /// were requested by the user.
+  SmallVector<const ToolChain *, 4> OrderedOffloadingToolchains;
+
+  /// \brief Type for the cache of the results for the offloading host emitted
+  /// so far. The host results can be required by the device tools.
+  typedef llvm::DenseMap<const Action *, InputInfoList> OffloadingHostResultsTy;
+
 private:
+  /// CreateUnbundledOffloadingResult - Create a command to unbundle the input
+  /// and use the resulting input info. If there are inputs already cached in
+  /// OffloadingHostResults for that action use them instead. If offloading
+  /// is not supported, just return the provided input info.
+  InputInfo CreateUnbundledOffloadingResult(
+      Compilation &C, const OffloadUnbundlingJobAction *CurAction,
+      const ToolChain *TC, InputInfo Result,
+      OffloadingHostResultsTy &OffloadingHostResults) const;
+
+  /// CreateBundledOffloadingResult - Create a bundle of all provided results
+  /// and return the InputInfo of the bundled file.
+  InputInfo CreateBundledOffloadingResult(
+      Compilation &C, const OffloadBundlingJobAction *CurAction,
+      const ToolChain *TC, InputInfoList Results) const;
+
+  /// PostProcessOffloadingInputsAndResults - Update the input and output
+  /// information to suit the needs of the offloading implementation. This used
+  /// to, e.g., to pass extra results from host to device side and vice-versa.
+  void PostProcessOffloadingInputsAndResults(
+      Compilation &C, const JobAction *JA, const ToolChain *TC,
+      InputInfoList &Inputs, InputInfo &Result,
+      OffloadingHostResultsTy &OffloadingHostResults) const;
+
   /// TranslateInputArgs - Create a new derived argument list from the input
   /// arguments, after applying the standard argument translations.
   llvm::opt::DerivedArgList *
@@ -378,7 +421,8 @@
                           bool AtTopLevel,
                           bool MultipleArchs,
                           const char *LinkingOutput,
-                          InputInfo &Result) const;
+                          InputInfo &Result,
+                          OffloadingHostResultsTy &OffloadingHostResults) const;
 
   /// Returns the default name for linked images (e.g., "a.out").
   const char *getDefaultImageName() const;
@@ -417,9 +461,11 @@
   /// \brief Retrieves a ToolChain for a particular \p Target triple.
   ///
   /// Will cache ToolChains for the life of the driver object, and create them
-  /// on-demand.
-  const ToolChain &getToolChain(const llvm::opt::ArgList &Args,
-                                const llvm::Triple &Target) const;
+  /// on-demand. \a OffloadingKind specifies if the toolchain being created
+  /// refers to any kind of offloading (e.g. OpenMP).
+  const ToolChain &getToolChain(
+      const llvm::opt::ArgList &Args, const llvm::Triple &Target,
+      ToolChain::OffloadingKind OffloadingKind = ToolChain::OK_None) const;
 
   /// @}
 
Index: include/clang/Driver/CC1Options.td
===================================================================
--- include/clang/Driver/CC1Options.td
+++ include/clang/Driver/CC1Options.td
@@ -665,7 +665,16 @@
   HelpText<"Enable function overloads based on CUDA target attributes.">;
 def fcuda_uses_libdevice : Flag<["-"], "fcuda-uses-libdevice">,
   HelpText<"Selectively link and internalize bitcode.">;
+  
+//===----------------------------------------------------------------------===//
+// OpenMP Options
+//===----------------------------------------------------------------------===//
 
+def fopenmp_is_device : Flag<["-"], "fopenmp-is-device">,
+  HelpText<"Generate code only for an OpenMP target device.">;
+def omp_host_ir_file_path : Separate<["-"], "omp-host-ir-file-path">,
+  HelpText<"Path to the IR file produced by the frontend for the host.">;
+  
 } // let Flags = [CC1Option]
 
 
Index: include/clang/Driver/Action.h
===================================================================
--- include/clang/Driver/Action.h
+++ include/clang/Driver/Action.h
@@ -55,9 +55,11 @@
     DsymutilJobClass,
     VerifyDebugInfoJobClass,
     VerifyPCHJobClass,
+    OffloadBundlingJobClass,
+    OffloadUnbundlingJobClass,
 
-    JobClassFirst=PreprocessJobClass,
-    JobClassLast=VerifyPCHJobClass
+    JobClassFirst = PreprocessJobClass,
+    JobClassLast = OffloadUnbundlingJobClass
   };
 
   static const char *getClassName(ActionClass AC);
@@ -177,6 +179,7 @@
 class JobAction : public Action {
   virtual void anchor();
 protected:
+  JobAction(ActionClass Kind, std::unique_ptr<Action> Input);
   JobAction(ActionClass Kind, std::unique_ptr<Action> Input, types::ID Type);
   JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type);
 
@@ -187,6 +190,30 @@
   }
 };
 
+class OffloadBundlingJobAction : public JobAction {
+  void anchor() override;
+
+public:
+  // Offloading bundling doesn't change the type of output.
+  OffloadBundlingJobAction(std::unique_ptr<Action> Input);
+
+  static bool classof(const Action *A) {
+    return A->getKind() == OffloadBundlingJobClass;
+  }
+};
+
+class OffloadUnbundlingJobAction : public JobAction {
+  void anchor() override;
+
+public:
+  // Offloading unbundling doesn't change the type of output.
+  OffloadUnbundlingJobAction(std::unique_ptr<Action> Input);
+
+  static bool classof(const Action *A) {
+    return A->getKind() == OffloadUnbundlingJobClass;
+  }
+};
+
 class PreprocessJobAction : public JobAction {
   void anchor() override;
 public:
Index: include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- include/clang/Basic/DiagnosticDriverKinds.td
+++ include/clang/Basic/DiagnosticDriverKinds.td
@@ -115,6 +115,8 @@
 def err_drv_optimization_remark_pattern : Error<
   "%0 in '%1'">;
 def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
+def err_drv_invalid_omp_target : Error<
+  "OpenMP target is invalid: '%0'">;
 
 def warn_O4_is_O3 : Warning<"-O4 is equivalent to -O3">, InGroup<Deprecated>;
 def warn_drv_lto_libpath : Warning<"libLTO.dylib relative to clang installed dir not found; using 'ld' default search path instead">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to