jhuber6 updated this revision to Diff 414371.
jhuber6 added a comment.
Herald added subscribers: abrachet, phosek.

Fix architecture parsing and still include the GPU binary so cuobjcopy can use 
them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===================================================================
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:        -foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -71,8 +71,8 @@
   if (Args.hasArg(options::OPT_static))
     if (const Arg *A =
             Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
-      D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
-                                                      << "-static";
+      D.Diag(diag::err_drv_argument_not_allowed_with)
+          << A->getAsString(Args) << "-static";
 }
 
 // Add backslashes to escape spaces and other backslashes.
@@ -157,8 +157,8 @@
 /// parameter in reciprocal argument strings. Return false if there is an error
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
-static bool getRefinementStep(StringRef In, const Driver &D,
-                              const Arg &A, size_t &Position) {
+static bool getRefinementStep(StringRef In, const Driver &D, const Arg &A,
+                              size_t &Position) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
   if (Position != StringRef::npos) {
@@ -510,7 +510,7 @@
 }
 
 static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
-  switch (Triple.getArch()){
+  switch (Triple.getArch()) {
   default:
     return false;
   case llvm::Triple::arm:
@@ -705,7 +705,7 @@
 
 /// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
 static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
-                                   ArgStringList &CmdArgs) {
+                                    ArgStringList &CmdArgs) {
   for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
                                     options::OPT_fcoverage_prefix_map_EQ)) {
     StringRef Map = A->getValue();
@@ -801,13 +801,12 @@
       CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
     CSPGOGenerateArg = nullptr;
 
-  auto *ProfileGenerateArg = Args.getLastArg(
-      options::OPT_fprofile_instr_generate,
-      options::OPT_fprofile_instr_generate_EQ,
-      options::OPT_fno_profile_instr_generate);
-  if (ProfileGenerateArg &&
-      ProfileGenerateArg->getOption().matches(
-          options::OPT_fno_profile_instr_generate))
+  auto *ProfileGenerateArg =
+      Args.getLastArg(options::OPT_fprofile_instr_generate,
+                      options::OPT_fprofile_instr_generate_EQ,
+                      options::OPT_fno_profile_instr_generate);
+  if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches(
+                                options::OPT_fno_profile_instr_generate))
     ProfileGenerateArg = nullptr;
 
   if (PGOGenerateArg && ProfileGenerateArg)
@@ -1334,8 +1333,8 @@
       }
 
       if (ThroughHeader.empty()) {
-        CmdArgs.push_back(Args.MakeArgString(
-            Twine("-pch-through-hdrstop-") + (YcArg ? "create" : "use")));
+        CmdArgs.push_back(Args.MakeArgString(Twine("-pch-through-hdrstop-") +
+                                             (YcArg ? "create" : "use")));
       } else {
         CmdArgs.push_back(
             Args.MakeArgString(Twine("-pch-through-header=") + ThroughHeader));
@@ -1374,8 +1373,8 @@
           continue;
         } else {
           // Ignore the PCH if not first on command line and emit warning.
-          D.Diag(diag::warn_drv_pch_not_first_include) << P
-                                                       << A->getAsString(Args);
+          D.Diag(diag::warn_drv_pch_not_first_include)
+              << P << A->getAsString(Args);
         }
       }
     } else if (A->getOption().matches(options::OPT_isystem_after)) {
@@ -1552,8 +1551,9 @@
       if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
         F = FinalOutput->getValue();
     } else {
-      if (Format != "yaml" && // For YAML, keep the original behavior.
-          Triple.isOSDarwin() && // Enable this only on darwin, since it's the only platform supporting .dSYM bundles.
+      if (Format != "yaml" &&    // For YAML, keep the original behavior.
+          Triple.isOSDarwin() && // Enable this only on darwin, since it's the
+                                 // only platform supporting .dSYM bundles.
           Output.isFilename())
         F = Output.getFilename();
     }
@@ -1647,7 +1647,7 @@
       StringRef(*StrictAlignIter) == "+strict-align")
     CmdArgs.push_back("-Wunaligned-access");
 }
-}
+} // namespace
 
 static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,
                                     ArgStringList &CmdArgs, bool isAArch64) {
@@ -1842,7 +1842,7 @@
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName);
 }
-}
+} // namespace
 
 void Clang::AddAArch64TargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
@@ -1884,17 +1884,19 @@
       if (Val.endswith("+"))
         Val = Val.substr(0, Val.size() - 1);
       else {
-        bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid;
+        bool Invalid = Val.getAsInteger(10, Bits);
+        (void)Invalid;
         assert(!Invalid && "Failed to parse value");
         CmdArgs.push_back(
             Args.MakeArgString("-mvscale-max=" + llvm::Twine(Bits / 128)));
       }
 
-      bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid;
+      bool Invalid = Val.getAsInteger(10, Bits);
+      (void)Invalid;
       assert(!Invalid && "Failed to parse value");
       CmdArgs.push_back(
           Args.MakeArgString("-mvscale-min=" + llvm::Twine(Bits / 128)));
-    // Silently drop requests for vector-length agnostic code as it's implied.
+      // Silently drop requests for vector-length agnostic code as it's implied.
     } else if (!Val.equals("scalable"))
       // Handle the unsupported values passed to msve-vector-bits.
       D.Diag(diag::err_drv_unsupported_option_argument)
@@ -2077,8 +2079,8 @@
   if (T.isOSBinFormatELF()) {
     switch (getToolChain().getArch()) {
     case llvm::Triple::ppc64: {
-      if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) ||
-          T.isOSOpenBSD() || T.isMusl())
+      if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) || T.isOSOpenBSD() ||
+          T.isMusl())
         ABIName = "elfv2";
       else
         ABIName = "elfv1";
@@ -2100,9 +2102,9 @@
     else if (V == "ibmlongdouble")
       IEEELongDouble = false;
     else if (V != "altivec")
-      // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
-      // the option if given as we don't have backend support for any targets
-      // that don't use the altivec abi.
+      // The ppc64 linux abis are all "altivec" abis by default. Accept and
+      // ignore the option if given as we don't have backend support for any
+      // targets that don't use the altivec abi.
       ABIName = A->getValue();
   }
   if (IEEELongDouble)
@@ -2204,8 +2206,8 @@
 
 void Clang::AddSystemZTargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
-  bool HasBackchain = Args.hasFlag(options::OPT_mbackchain,
-                                   options::OPT_mno_backchain, false);
+  bool HasBackchain =
+      Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false);
   bool HasPackedStack = Args.hasFlag(options::OPT_mpacked_stack,
                                      options::OPT_mno_packed_stack, false);
   systemz::FloatABI FloatABI =
@@ -2214,7 +2216,7 @@
   if (HasBackchain && HasPackedStack && !HasSoftFloat) {
     const Driver &D = getToolChain().getDriver();
     D.Diag(diag::err_drv_unsupported_opt)
-      << "-mpacked-stack -mbackchain -mhard-float";
+        << "-mpacked-stack -mbackchain -mhard-float";
   }
   if (HasBackchain)
     CmdArgs.push_back("-mbackchain");
@@ -2371,7 +2373,8 @@
 
 void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename,
                                     StringRef Target, const InputInfo &Output,
-                                    const InputInfo &Input, const ArgList &Args) const {
+                                    const InputInfo &Input,
+                                    const ArgList &Args) const {
   // If this is a dry run, do not create the compilation database file.
   if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
     return;
@@ -2384,8 +2387,8 @@
     auto File = std::make_unique<llvm::raw_fd_ostream>(
         Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
     if (EC) {
-      D.Diag(clang::diag::err_drv_compilationdatabase) << Filename
-                                                       << EC.message();
+      D.Diag(clang::diag::err_drv_compilationdatabase)
+          << Filename << EC.message();
       return;
     }
     CompilationDatabase = std::move(File);
@@ -2408,7 +2411,7 @@
     CDB << ", \"" << escape(Buf) << "\"";
   }
   CDB << ", \"" << escape(Input.getFilename()) << "\"";
-  for (auto &A: Args) {
+  for (auto &A : Args) {
     auto &O = A->getOption();
     // Skip language selection, which is positional.
     if (O.getID() == options::OPT_x)
@@ -2424,7 +2427,7 @@
     // All other arguments are quoted and appended.
     ArgStringList ASL;
     A->render(Args, ASL);
-    for (auto &it: ASL)
+    for (auto &it : ASL)
       CDB << ", \"" << escape(it) << "\"";
   }
   Buf = "--target=";
@@ -2646,26 +2649,26 @@
                  Value.startswith("-mhwdiv") || Value.startswith("-march")) {
         // Do nothing, we'll validate it later.
       } else if (Value == "-defsym") {
-          if (A->getNumValues() != 2) {
-            D.Diag(diag::err_drv_defsym_invalid_format) << Value;
-            break;
-          }
-          const char *S = A->getValue(1);
-          auto Pair = StringRef(S).split('=');
-          auto Sym = Pair.first;
-          auto SVal = Pair.second;
-
-          if (Sym.empty() || SVal.empty()) {
-            D.Diag(diag::err_drv_defsym_invalid_format) << S;
-            break;
-          }
-          int64_t IVal;
-          if (SVal.getAsInteger(0, IVal)) {
-            D.Diag(diag::err_drv_defsym_invalid_symval) << SVal;
-            break;
-          }
-          CmdArgs.push_back(Value.data());
-          TakeNextArg = true;
+        if (A->getNumValues() != 2) {
+          D.Diag(diag::err_drv_defsym_invalid_format) << Value;
+          break;
+        }
+        const char *S = A->getValue(1);
+        auto Pair = StringRef(S).split('=');
+        auto Sym = Pair.first;
+        auto SVal = Pair.second;
+
+        if (Sym.empty() || SVal.empty()) {
+          D.Diag(diag::err_drv_defsym_invalid_format) << S;
+          break;
+        }
+        int64_t IVal;
+        if (SVal.getAsInteger(0, IVal)) {
+          D.Diag(diag::err_drv_defsym_invalid_symval) << SVal;
+          break;
+        }
+        CmdArgs.push_back(Value.data());
+        TakeNextArg = true;
       } else if (Value == "-fdebug-compilation-dir") {
         CmdArgs.push_back("-fdebug-compilation-dir");
         TakeNextArg = true;
@@ -2717,7 +2720,7 @@
   bool AssociativeMath = false;
   bool ReciprocalMath = false;
   bool SignedZeros = true;
-  bool TrappingMath = false; // Implemented via -ffp-exception-behavior
+  bool TrappingMath = false;        // Implemented via -ffp-exception-behavior
   bool TrappingMathPresent = false; // Is trapping-math in args, and not
                                     // overriden by ffp-exception-behavior?
   bool RoundingFPMath = false;
@@ -2773,10 +2776,9 @@
 
       StringRef Val = A->getValue();
       if (OFastEnabled && !Val.equals("fast")) {
-          // Only -ffp-model=fast is compatible with OFast, ignore.
+        // Only -ffp-model=fast is compatible with OFast, ignore.
         D.Diag(clang::diag::warn_drv_overriding_flag_option)
-          << Args.MakeArgString("-ffp-model=" + Val)
-          << "-Ofast";
+            << Args.MakeArgString("-ffp-model=" + Val) << "-Ofast";
         break;
       }
       StrictFPModel = false;
@@ -2808,35 +2810,65 @@
         D.Diag(diag::err_drv_unsupported_option_argument)
             << A->getOption().getName() << Val;
       break;
-      }
+    }
     }
 
     switch (optID) {
     // If this isn't an FP option skip the claim below
-    default: continue;
+    default:
+      continue;
 
     // Options controlling individual features
-    case options::OPT_fhonor_infinities:    HonorINFs = true;         break;
-    case options::OPT_fno_honor_infinities: HonorINFs = false;        break;
-    case options::OPT_fhonor_nans:          HonorNaNs = true;         break;
-    case options::OPT_fno_honor_nans:       HonorNaNs = false;        break;
-    case options::OPT_fapprox_func:         ApproxFunc = true;        break;
-    case options::OPT_fno_approx_func:      ApproxFunc = false;       break;
-    case options::OPT_fmath_errno:          MathErrno = true;         break;
-    case options::OPT_fno_math_errno:       MathErrno = false;        break;
-    case options::OPT_fassociative_math:    AssociativeMath = true;   break;
-    case options::OPT_fno_associative_math: AssociativeMath = false;  break;
-    case options::OPT_freciprocal_math:     ReciprocalMath = true;    break;
-    case options::OPT_fno_reciprocal_math:  ReciprocalMath = false;   break;
-    case options::OPT_fsigned_zeros:        SignedZeros = true;       break;
-    case options::OPT_fno_signed_zeros:     SignedZeros = false;      break;
+    case options::OPT_fhonor_infinities:
+      HonorINFs = true;
+      break;
+    case options::OPT_fno_honor_infinities:
+      HonorINFs = false;
+      break;
+    case options::OPT_fhonor_nans:
+      HonorNaNs = true;
+      break;
+    case options::OPT_fno_honor_nans:
+      HonorNaNs = false;
+      break;
+    case options::OPT_fapprox_func:
+      ApproxFunc = true;
+      break;
+    case options::OPT_fno_approx_func:
+      ApproxFunc = false;
+      break;
+    case options::OPT_fmath_errno:
+      MathErrno = true;
+      break;
+    case options::OPT_fno_math_errno:
+      MathErrno = false;
+      break;
+    case options::OPT_fassociative_math:
+      AssociativeMath = true;
+      break;
+    case options::OPT_fno_associative_math:
+      AssociativeMath = false;
+      break;
+    case options::OPT_freciprocal_math:
+      ReciprocalMath = true;
+      break;
+    case options::OPT_fno_reciprocal_math:
+      ReciprocalMath = false;
+      break;
+    case options::OPT_fsigned_zeros:
+      SignedZeros = true;
+      break;
+    case options::OPT_fno_signed_zeros:
+      SignedZeros = false;
+      break;
     case options::OPT_ftrapping_math:
       if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
           !FPExceptionBehavior.equals("strict"))
         // Warn that previous value of option is overridden.
         D.Diag(clang::diag::warn_drv_overriding_flag_option)
-          << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
-          << "-ftrapping-math";
+            << Args.MakeArgString("-ffp-exception-behavior=" +
+                                  FPExceptionBehavior)
+            << "-ftrapping-math";
       TrappingMath = true;
       TrappingMathPresent = true;
       FPExceptionBehavior = "strict";
@@ -2846,8 +2878,9 @@
           !FPExceptionBehavior.equals("ignore"))
         // Warn that previous value of option is overridden.
         D.Diag(clang::diag::warn_drv_overriding_flag_option)
-          << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
-          << "-fno-trapping-math";
+            << Args.MakeArgString("-ffp-exception-behavior=" +
+                                  FPExceptionBehavior)
+            << "-fno-trapping-math";
       TrappingMath = false;
       TrappingMathPresent = true;
       FPExceptionBehavior = "ignore";
@@ -2891,7 +2924,7 @@
         FPContract = Val;
       else
         D.Diag(diag::err_drv_unsupported_option_argument)
-           << A->getOption().getName() << Val;
+            << A->getOption().getName() << Val;
       break;
     }
 
@@ -2909,8 +2942,9 @@
           !FPExceptionBehavior.equals(Val))
         // Warn that previous value of option is overridden.
         D.Diag(clang::diag::warn_drv_overriding_flag_option)
-          << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
-          << Args.MakeArgString("-ffp-exception-behavior=" + Val);
+            << Args.MakeArgString("-ffp-exception-behavior=" +
+                                  FPExceptionBehavior)
+            << Args.MakeArgString("-ffp-exception-behavior=" + Val);
       TrappingMath = TrappingMathPresent = false;
       if (Val.equals("ignore") || Val.equals("maytrap"))
         FPExceptionBehavior = Val;
@@ -3021,9 +3055,10 @@
         StrictFPModel = false;
         FPModel = "";
         D.Diag(clang::diag::warn_drv_overriding_flag_option)
-            << "-ffp-model=strict" <<
-            ((A->getNumValues() == 0) ?  A->getSpelling()
-            : Args.MakeArgString(A->getSpelling() + A->getValue()));
+            << "-ffp-model=strict"
+            << ((A->getNumValues() == 0)
+                    ? A->getSpelling()
+                    : Args.MakeArgString(A->getSpelling() + A->getValue()));
       }
     }
 
@@ -3087,8 +3122,8 @@
     CmdArgs.push_back(Args.MakeArgString("-frounding-math"));
 
   if (!FPExceptionBehavior.empty())
-    CmdArgs.push_back(Args.MakeArgString("-ffp-exception-behavior=" +
-                      FPExceptionBehavior));
+    CmdArgs.push_back(
+        Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior));
 
   if (!FPEvalMethod.empty())
     CmdArgs.push_back(Args.MakeArgString("-ffp-eval-method=" + FPEvalMethod));
@@ -3110,8 +3145,8 @@
         CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
       else
         D.Diag(clang::diag::warn_drv_overriding_flag_option)
-          << "-ffp-model=fast"
-          << Args.MakeArgString("-ffp-contract=" + FPContract);
+            << "-ffp-model=fast"
+            << Args.MakeArgString("-ffp-contract=" + FPContract);
     }
   }
 
@@ -3166,8 +3201,7 @@
       CmdArgs.push_back("-analyzer-checker=osx");
       CmdArgs.push_back(
           "-analyzer-checker=security.insecureAPI.decodeValueOfObjCType");
-    }
-    else if (Triple.isOSFuchsia())
+    } else if (Triple.isOSFuchsia())
       CmdArgs.push_back("-analyzer-checker=fuchsia");
 
     CmdArgs.push_back("-analyzer-checker=deadcode");
@@ -3176,7 +3210,8 @@
       CmdArgs.push_back("-analyzer-checker=cplusplus");
 
     if (!Triple.isPS4()) {
-      CmdArgs.push_back("-analyzer-checker=security.insecureAPI.UncheckedReturn");
+      CmdArgs.push_back(
+          "-analyzer-checker=security.insecureAPI.UncheckedReturn");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
@@ -3391,7 +3426,8 @@
     }
 
   if (!TrivialAutoVarInit.empty()) {
-    if (TrivialAutoVarInit == "zero" && !Args.hasArg(options::OPT_enable_trivial_var_init_zero))
+    if (TrivialAutoVarInit == "zero" &&
+        !Args.hasArg(options::OPT_enable_trivial_var_init_zero))
       D.Diag(diag::err_drv_trivial_auto_var_init_zero_disabled);
     CmdArgs.push_back(
         Args.MakeArgString("-ftrivial-auto-var-init=" + TrivialAutoVarInit));
@@ -3428,8 +3464,7 @@
       options::OPT_cl_mad_enable,
       options::OPT_cl_no_signed_zeros,
       options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
-      options::OPT_cl_uniform_work_group_size
-  };
+      options::OPT_cl_uniform_work_group_size};
 
   if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
     std::string CLStdStr = std::string("-cl-std=") + A->getValue();
@@ -3458,7 +3493,8 @@
                                        options::OPT_ccc_arcmt_migrate)) {
       ARCMTEnabled = true;
       switch (A->getOption().getID()) {
-      default: llvm_unreachable("missed a case");
+      default:
+        llvm_unreachable("missed a case");
       case options::OPT_ccc_arcmt_check:
         CmdArgs.push_back("-arcmt-action=check");
         break;
@@ -3744,10 +3780,9 @@
 static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T,
                                    ArgStringList &CmdArgs) {
   // -fsigned-char is default.
-  if (const Arg *A = Args.getLastArg(options::OPT_fsigned_char,
-                                     options::OPT_fno_signed_char,
-                                     options::OPT_funsigned_char,
-                                     options::OPT_fno_unsigned_char)) {
+  if (const Arg *A = Args.getLastArg(
+          options::OPT_fsigned_char, options::OPT_fno_signed_char,
+          options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) {
     if (A->getOption().matches(options::OPT_funsigned_char) ||
         A->getOption().matches(options::OPT_fno_signed_char)) {
       CmdArgs.push_back("-fno-signed-char");
@@ -3840,9 +3875,8 @@
     auto *Arg = Args.getLastArg(
         options::OPT_fobjc_convert_messages_to_runtime_calls,
         options::OPT_fno_objc_convert_messages_to_runtime_calls);
-    if (Arg &&
-        Arg->getOption().matches(
-            options::OPT_fno_objc_convert_messages_to_runtime_calls))
+    if (Arg && Arg->getOption().matches(
+                   options::OPT_fno_objc_convert_messages_to_runtime_calls))
       CmdArgs.push_back("-fno-objc-convert-messages-to-runtime-calls");
   }
 
@@ -4290,7 +4324,8 @@
                             ? "-gpubnames"
                             : "-ggnu-pubnames");
   const auto *SimpleTemplateNamesArg =
-      Args.getLastArg(options::OPT_gsimple_template_names, options::OPT_gno_simple_template_names,
+      Args.getLastArg(options::OPT_gsimple_template_names,
+                      options::OPT_gno_simple_template_names,
                       options::OPT_gsimple_template_names_EQ);
   bool ForwardTemplateParams = DebuggerTuning == llvm::DebuggerKind::SCE;
   if (SimpleTemplateNamesArg &&
@@ -4391,6 +4426,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4403,7 +4439,7 @@
 
   // A header module compilation doesn't have a main input file, so invent a
   // fake one as a placeholder.
-  const char *ModuleName = [&]{
+  const char *ModuleName = [&] {
     auto *ModuleNameArg = Args.getLastArg(options::OPT_fmodule_name_EQ);
     return ModuleNameArg ? ModuleNameArg->getValue() : "";
   }();
@@ -4414,6 +4450,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo &I : Inputs) {
@@ -4428,6 +4465,9 @@
             << types::getTypeName(Expected);
       }
       ModuleHeaderInputs.push_back(I);
+    } else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+      CudaHostInputs.push_back(I);
+      CudaDeviceInput = &I;
     } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
       CudaDeviceInput = &I;
     } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -4533,8 +4573,8 @@
     bool Failure =
         Triple.getArchName().substr(Offset).consumeInteger(10, Version);
     if (Failure || Version < 7)
-      D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
-                                                << TripleStr;
+      D.Diag(diag::err_target_unsupported_arch)
+          << Triple.getArchName() << TripleStr;
   }
 
   // Push all default warning arguments that are specific to
@@ -4604,9 +4644,8 @@
     if (JA.getType() == types::TY_Nothing)
       CmdArgs.push_back("-fsyntax-only");
     else if (JA.getType() == types::TY_ModuleFile)
-      CmdArgs.push_back(IsHeaderModulePrecompile
-                            ? "-emit-header-module"
-                            : "-emit-module-interface");
+      CmdArgs.push_back(IsHeaderModulePrecompile ? "-emit-header-module"
+                                                 : "-emit-module-interface");
     else
       CmdArgs.push_back("-emit-pch");
   } else if (isa<VerifyPCHJobAction>(JA)) {
@@ -4769,7 +4808,8 @@
 
     // Render ABI arguments
     switch (TC.getArch()) {
-    default: break;
+    default:
+      break;
     case llvm::Triple::arm:
     case llvm::Triple::armeb:
     case llvm::Triple::thumbeb:
@@ -5105,7 +5145,7 @@
   }
 
   CodeGenOptions::FramePointerKind FPKeepKind =
-                  getFramePointerKind(Args, RawTriple);
+      getFramePointerKind(Args, RawTriple);
   const char *FPKeepKindStr = nullptr;
   switch (FPKeepKind) {
   case CodeGenOptions::FramePointerKind::None:
@@ -5149,12 +5189,10 @@
                    options::OPT_fno_allow_editor_placeholders, false))
     CmdArgs.push_back("-fallow-editor-placeholders");
   if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
-                   options::OPT_fno_strict_vtable_pointers,
-                   false))
+                   options::OPT_fno_strict_vtable_pointers, false))
     CmdArgs.push_back("-fstrict-vtable-pointers");
   if (Args.hasFlag(options::OPT_fforce_emit_vtables,
-                   options::OPT_fno_force_emit_vtables,
-                   false))
+                   options::OPT_fno_force_emit_vtables, false))
     CmdArgs.push_back("-fforce-emit-vtables");
   if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
                     options::OPT_fno_optimize_sibling_calls))
@@ -5697,8 +5735,7 @@
                                   /*Joined=*/true);
       } else
         ImplyVCPPCVer = true;
-    }
-    else if (IsWindowsMSVC)
+    } else if (IsWindowsMSVC)
       ImplyVCPPCXXVer = true;
 
     Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs,
@@ -5806,7 +5843,7 @@
 
   if (const Arg *A = Args.getLastArg(options::OPT_fcf_runtime_abi_EQ)) {
     static const char *kCFABIs[] = {
-      "standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1",
+        "standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1",
     };
 
     if (find(kCFABIs, StringRef(A->getValue())) == std::end(kCFABIs))
@@ -5927,13 +5964,13 @@
           << A->getAsString(Args) << TripleStr;
   }
 
-
   if (Args.hasFlag(options::OPT_fvisibility_inlines_hidden,
-                    options::OPT_fno_visibility_inlines_hidden, false))
+                   options::OPT_fno_visibility_inlines_hidden, false))
     CmdArgs.push_back("-fvisibility-inlines-hidden");
 
-  Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var,
-                           options::OPT_fno_visibility_inlines_hidden_static_local_var);
+  Args.AddLastArg(CmdArgs,
+                  options::OPT_fvisibility_inlines_hidden_static_local_var,
+                  options::OPT_fno_visibility_inlines_hidden_static_local_var);
   Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden);
   Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);
 
@@ -6319,8 +6356,8 @@
 
   ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
 
-  if (KernelOrKext || (types::isCXX(InputType) &&
-                       (RTTIMode == ToolChain::RM_Disabled)))
+  if (KernelOrKext ||
+      (types::isCXX(InputType) && (RTTIMode == ToolChain::RM_Disabled)))
     CmdArgs.push_back("-fno-rtti");
 
   // -fshort-enums=0 is default for all architectures except Hexagon and z/OS.
@@ -6654,16 +6691,16 @@
   if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) {
     StringRef value = inputCharset->getValue();
     if (!value.equals_insensitive("utf-8"))
-      D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args)
-                                          << value;
+      D.Diag(diag::err_drv_invalid_value)
+          << inputCharset->getAsString(Args) << value;
   }
 
   // -fexec_charset=UTF-8 is default. Reject others
   if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
     StringRef value = execCharset->getValue();
     if (!value.equals_insensitive("utf-8"))
-      D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
-                                          << value;
+      D.Diag(diag::err_drv_invalid_value)
+          << execCharset->getAsString(Args) << value;
   }
 
   RenderDiagnosticsOptions(D, Args, CmdArgs);
@@ -6827,15 +6864,13 @@
   // parser.
   // -finclude-default-header flag is for preprocessor,
   // do not pass it to other cc1 commands when save-temps is enabled
-  if (C.getDriver().isSaveTempsEnabled() &&
-      !isa<PreprocessJobAction>(JA)) {
+  if (C.getDriver().isSaveTempsEnabled() && !isa<PreprocessJobAction>(JA)) {
     for (auto Arg : Args.filtered(options::OPT_Xclang)) {
       Arg->claim();
       if (StringRef(Arg->getValue()) != "-finclude-default-header")
         CmdArgs.push_back(Arg->getValue());
     }
-  }
-  else {
+  } else {
     Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
   }
   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
@@ -6911,10 +6946,10 @@
   // Host-side cuda compilation receives all device-side outputs in a single
   // fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.
   if ((IsCuda || IsHIP) && CudaDeviceInput) {
-      CmdArgs.push_back("-fcuda-include-gpubinary");
-      CmdArgs.push_back(CudaDeviceInput->getFilename());
-      if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
-        CmdArgs.push_back("-fgpu-rdc");
+    CmdArgs.push_back("-fcuda-include-gpubinary");
+    CmdArgs.push_back(CudaDeviceInput->getFilename());
+    if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+      CmdArgs.push_back("-fgpu-rdc");
   }
 
   if (IsCuda) {
@@ -6970,6 +7005,7 @@
     auto OpenMPTCs = C.getOffloadToolChains<Action::OFK_OpenMP>();
     for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
          ++TI, ++InputFile) {
+      assert(InputFile->isFilename() && "Offloading requires a filename");
       const ToolChain *TC = TI->second;
       const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
       StringRef File =
@@ -6982,6 +7018,20 @@
           TC->getTripleString() + "." +
           TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
     }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+    const ToolChain *TC = C.getSingleOffloadToolChain<Action::OFK_Cuda>();
+    for (const auto &InputFile : CudaHostInputs) {
+      assert(InputFile.isFilename() && "Offloading requires a filename");
+      StringRef File =
+          C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+      StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+      // The CUDA toolchain should have a bound arch appended to the filename.
+      StringRef Arch = File.rsplit(".").first.rsplit('-').second;
+      CmdArgs.push_back(Args.MakeArgString(
+          "-fembed-offload-object=" + File + "," +
+          Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+          TC->getTripleString() + "." + Arch + "." + InputName));
+    }
   }
 
   if (Triple.isAMDGPU()) {
@@ -7093,14 +7143,14 @@
   }
 
   if (Args.hasArg(options::OPT_forder_file_instrumentation)) {
-     CmdArgs.push_back("-forder-file-instrumentation");
-     // Enable order file instrumentation when ThinLTO is not on. When ThinLTO is
-     // on, we need to pass these flags as linker flags and that will be handled
-     // outside of the compiler.
-     if (!IsUsingLTO) {
-       CmdArgs.push_back("-mllvm");
-       CmdArgs.push_back("-enable-order-file-instrumentation");
-     }
+    CmdArgs.push_back("-forder-file-instrumentation");
+    // Enable order file instrumentation when ThinLTO is not on. When ThinLTO is
+    // on, we need to pass these flags as linker flags and that will be handled
+    // outside of the compiler.
+    if (!IsUsingLTO) {
+      CmdArgs.push_back("-mllvm");
+      CmdArgs.push_back("-enable-order-file-instrumentation");
+    }
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fforce_enable_int128,
@@ -7226,8 +7276,8 @@
   if (Arg *A = Args.getLastArg(options::OPT_pg))
     if (FPKeepKind == CodeGenOptions::FramePointerKind::None &&
         !Args.hasArg(options::OPT_mfentry))
-      D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
-                                                      << A->getAsString(Args);
+      D.Diag(diag::err_drv_argument_not_allowed_with)
+          << "-fomit-frame-pointer" << A->getAsString(Args);
 
   // Claim some arguments which clang supports automatically.
 
@@ -7280,7 +7330,7 @@
           !getToolChain().getTriple().isOSBinFormatCOFF()) {
         getToolChain().getDriver().Diag(
             diag::err_drv_gnustep_objc_runtime_incompatible_binary)
-          << runtime.getVersion().getMajor();
+            << runtime.getVersion().getMajor();
       }
 
     runtimeArg->render(args, cmdArgs);
@@ -7579,11 +7629,10 @@
   if (VolatileOptionID == options::OPT__SLASH_volatile_ms)
     CmdArgs.push_back("-fms-volatile");
 
- if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_,
-                  options::OPT__SLASH_Zc_dllexportInlines,
-                  false)) {
-  CmdArgs.push_back("-fno-dllexport-inlines");
- }
+  if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_,
+                   options::OPT__SLASH_Zc_dllexportInlines, false)) {
+    CmdArgs.push_back("-fno-dllexport-inlines");
+  }
 
   Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg);
   Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb);
@@ -7735,7 +7784,7 @@
 }
 
 void ClangAs::AddRISCVTargetArgs(const ArgList &Args,
-                               ArgStringList &CmdArgs) const {
+                                 ArgStringList &CmdArgs) const {
   const llvm::Triple &Triple = getToolChain().getTriple();
   StringRef ABIName = riscv::getRISCVABI(Args, Triple);
 
@@ -7854,7 +7903,6 @@
   renderDwarfFormat(D, Triple, Args, CmdArgs, DwarfVersion);
   RenderDebugInfoCompressionArgs(Args, CmdArgs, D, getToolChain());
 
-
   // Handle -fPIC et al -- the relocation-model affects the assembler
   // for some targets.
   llvm::Reloc::Model RelocationModel;
@@ -7916,8 +7964,8 @@
     // only, not C/C++.
     if (Args.hasFlag(options::OPT_mdefault_build_attributes,
                      options::OPT_mno_default_build_attributes, true)) {
-        CmdArgs.push_back("-mllvm");
-        CmdArgs.push_back("-arm-add-build-attributes");
+      CmdArgs.push_back("-mllvm");
+      CmdArgs.push_back("-arm-add-build-attributes");
     }
     break;
 
@@ -8234,14 +8282,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-    const ToolChain *TC = I.second;
-    if (TC->getTriple().isNVPTX()) {
-      CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-      if (CudaInstallation.isValid())
-        CmdArgs.push_back(Args.MakeArgString(
-            "--cuda-path=" + CudaInstallation.getInstallPath()));
-      break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+    auto TCRange = C.getOffloadToolChains(Kind);
+    for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) {
+      const ToolChain *TC = I.second;
+      if (TC->getTriple().isNVPTX()) {
+        CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+        if (CudaInstallation.isValid())
+          CmdArgs.push_back(Args.MakeArgString(
+              "--cuda-path=" + CudaInstallation.getInstallPath()));
+        break;
+      }
     }
   }
 
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -456,6 +456,10 @@
       }
     }
 
+    // Using the new offloading driver implies redistributable device code.
+    if (A->getOption().matches(options::OPT_foffload_new_driver))
+      DAL->AddFlagArg(A, Opts.getOption(options::OPT_fgpu_rdc));
+
     // Pick up inputs via the -- option.
     if (A->getOption().matches(options::OPT__DASH_DASH)) {
       A->claim();
@@ -4088,6 +4092,101 @@
   Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
 }
 
+/// Returns the canonical name for the offloading architecture when using HIP or
+/// CUDA.
+static StringRef getCanonicalArchString(Compilation &C,
+                                        llvm::opt::DerivedArgList &Args,
+                                        StringRef ArchStr,
+                                        Action::OffloadKind Kind) {
+  if (Kind == Action::OFK_Cuda) {
+    CudaArch Arch = StringToCudaArch(ArchStr);
+    if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+      C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+      return StringRef();
+    }
+    return Args.MakeArgStringRef(CudaArchToString(Arch));
+  } else if (Kind == Action::OFK_HIP) {
+    llvm::StringMap<bool> Features;
+    // getHIPOffloadTargetTriple() is known to return valid value as it has
+    // been called successfully in the CreateOffloadingDeviceToolChains().
+    auto Arch = parseTargetID(
+        *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr,
+        &Features);
+    if (!Arch) {
+      C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
+      C.setContainsError();
+      return StringRef();
+    }
+    return Args.MakeArgStringRef(
+        getCanonicalTargetID(Arch.getValue(), Features));
+  }
+  return StringRef();
+}
+
+/// Checks if the set offloading architectures does not conflict. Returns the
+/// incompatible pair if a conflict occurs.
+static llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
+getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
+                                  Action::OffloadKind Kind) {
+  if (Kind != Action::OFK_HIP)
+    return None;
+
+  std::set<StringRef> ArchSet;
+  llvm::copy(Archs, std::inserter(ArchSet, ArchSet.begin()));
+  return getConflictTargetIDCombination(ArchSet);
+}
+
+/// Returns the set of bound architectures active for this compilation kind.
+/// This function returns a set of bound architectures, if there are no bound
+/// architctures we return a set containing only the empty string.
+static llvm::DenseSet<StringRef>
+getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
+                Action::OffloadKind Kind) {
+
+  // If this is OpenMP offloading we don't use a bound architecture.
+  if (Kind == Action::OFK_OpenMP)
+    return llvm::DenseSet<StringRef>{StringRef()};
+
+  // --offload and --offload-arch options are mutually exclusive.
+  if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
+      Args.hasArgNoClaim(options::OPT_offload_arch_EQ,
+                         options::OPT_no_offload_arch_EQ)) {
+    C.getDriver().Diag(diag::err_opt_not_valid_with_opt)
+        << "--offload"
+        << (Args.hasArgNoClaim(options::OPT_offload_arch_EQ)
+                ? "--offload-arch"
+                : "--no-offload-arch");
+  }
+
+  llvm::DenseSet<StringRef> Archs;
+  for (auto &Arg : Args) {
+    if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
+      Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+    } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
+      if (Arg->getValue() == StringRef("all"))
+        Archs.clear();
+      else
+        Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+    }
+  }
+
+  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+    C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
+        << ConflictingArchs.getValue().first
+        << ConflictingArchs.getValue().second;
+    C.setContainsError();
+  }
+
+  if (Archs.empty()) {
+    if (Kind == Action::OFK_Cuda)
+      Archs.insert(CudaArchToString(DefaultCudaArch));
+    else if (Kind == Action::OFK_HIP)
+      Archs.insert(CudaArchToString(DefaultHIPArch));
+  }
+
+  return Archs;
+}
+
 Action *Driver::BuildOffloadingActions(Compilation &C,
                                        llvm::opt::DerivedArgList &Args,
                                        const InputTy &Input,
@@ -4100,12 +4199,18 @@
   types::ID InputType = Input.first;
   const Arg *InputArg = Input.second;
 
-  const Action::OffloadKind OffloadKinds[] = {Action::OFK_OpenMP};
+  const Action::OffloadKind OffloadKinds[] = {
+      Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP};
 
   for (Action::OffloadKind Kind : OffloadKinds) {
     SmallVector<const ToolChain *, 2> ToolChains;
     ActionList DeviceActions;
 
+    const bool Relocatable =
+        Kind == Action::OFK_OpenMP ||
+        Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+                     /*Default=*/false);
+
     auto TCRange = C.getOffloadToolChains(Kind);
     for (auto TI = TCRange.first, TE = TCRange.second; TI != TE; ++TI)
       ToolChains.push_back(TI->second);
@@ -4113,7 +4218,18 @@
     if (ToolChains.empty())
       continue;
 
-    for (unsigned I = 0; I < ToolChains.size(); ++I)
+    if (!Relocatable) {
+      Diags.Report(diag::err_drv_non_relocatable);
+      return HostAction;
+    }
+
+    // Get the product of all bound architectures and toolchains.
+    SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
+    for (const ToolChain *TC : ToolChains)
+      for (StringRef Arch : getOffloadArchs(C, Args, Kind))
+        TCAndArchs.push_back(std::make_pair(TC, Arch));
+
+    for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
       DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType));
 
     if (DeviceActions.empty())
@@ -4127,7 +4243,7 @@
         break;
       }
 
-      auto TC = ToolChains.begin();
+      auto TCAndArch = TCAndArchs.begin();
       for (Action *&A : DeviceActions) {
         A = ConstructPhaseAction(C, Args, Phase, A, Kind);
 
@@ -4135,19 +4251,28 @@
           HostAction->setCannotBeCollapsedWithNextDependentAction();
           OffloadAction::HostDependence HDep(
               *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
-              /*BourdArch=*/nullptr, Action::OFK_OpenMP);
+              /*BoundArch=*/nullptr, Kind);
           OffloadAction::DeviceDependences DDep;
-          DDep.add(*A, **TC, /*BoundArch=*/nullptr, Kind);
+          DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind);
           A = C.MakeAction<OffloadAction>(HDep, DDep);
+          ++TCAndArch;
+        } else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
+          ActionList FatbinActions;
+          for (Action *A : {A, A->getInputs()[0]}) {
+            OffloadAction::DeviceDependences DDep;
+            DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
+            FatbinActions.emplace_back(
+                C.MakeAction<OffloadAction>(DDep, A->getType()));
+          }
+          A = C.MakeAction<LinkJobAction>(FatbinActions, types::TY_CUDA_FATBIN);
         }
-        ++TC;
       }
     }
 
-    auto TC = ToolChains.begin();
+    auto TCAndArch = TCAndArchs.begin();
     for (Action *A : DeviceActions) {
-      DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Kind);
-      TC++;
+      DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
+      ++TCAndArch;
     }
   }
 
@@ -4249,7 +4374,7 @@
       return C.MakeAction<BackendJobAction>(Input, Output);
     }
     if (isUsingLTO(/* IsOffload */ true) &&
-        TargetDeviceOffloadKind == Action::OFK_OpenMP) {
+        TargetDeviceOffloadKind != Action::OFK_None) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2512,6 +2512,8 @@
   HelpText<"Use the static host OpenMP runtime while linking.">;
 def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
   HelpText<"Use the new driver for OpenMP offloading.">;
+def foffload_new_driver : Flag<["-"], "foffload-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
+  HelpText<"Use the new driver for offloading.">;
 def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
 def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
 defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls",
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -327,6 +327,8 @@
   "invalid value '%1' in '%0', value must be 'none' or a positive integer">;
 def err_drv_small_columns : Error<
   "invalid value '%1' in '%0', value must be '%2' or greater">;
+def err_drv_non_relocatable : Error<
+  "the new driver requires relocatable code, compile with '-fgpu-rdc' enabled">;
 
 def err_drv_invalid_malign_branch_EQ : Error<
   "invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
Index: clang/include/clang/Basic/Cuda.h
===================================================================
--- clang/include/clang/Basic/Cuda.h
+++ clang/include/clang/Basic/Cuda.h
@@ -102,6 +102,9 @@
   LAST,
 };
 
+constexpr CudaArch DefaultCudaArch = CudaArch::SM_35;
+constexpr CudaArch DefaultHIPArch = CudaArch::GFX803;
+
 static inline bool IsNVIDIAGpuArch(CudaArch A) {
   return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to