Re: r242085 - [cuda] Driver changes to compile and stitch together host and device-side CUDA code.

Krzysztof Parzyszek Tue, 14 Jul 2015 10:34:42 -0700

The testcase test/Driver/cuda-options.cu fails on Hexagon and MIPS clangbuildbots, but I can't see when it started to fail.


-Krzysztof



On 7/14/2015 5:20 AM, Manuel Klimek wrote:

Btw, this seems to create cuda actions when building with -fsynatx-only
(this is breaking tools that operate on cuda code, because they get  an
error that only one compile action is expected).

On Tue, Jul 14, 2015 at 8:43 AM Justin Bogner <m...@justinbogner.com
<mailto:m...@justinbogner.com>> wrote:

    Artem Belevich <t...@google.com <mailto:t...@google.com>> writes:
     > Author: tra
     > Date: Mon Jul 13 18:27:56 2015
     > New Revision: 242085
     >
     > URL: http://llvm.org/viewvc/llvm-project?rev=242085&view=rev
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject-3Frev-3D242085-26view-3Drev&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=n86G5mEAJ9s0ikfRT1absPVmNPYjSGBakYbH86B22Vg&e=>
     > Log:
     > [cuda] Driver changes to compile and stitch together host and
    device-side CUDA code.
     >
     >   NOTE: reverts r242077 to reinstate r242058, r242065, 242067
     >         and includes fix for OS X test failures.
     >
     >   - Changed driver pipeline to compile host and device side of CUDA
     >     files and incorporate results of device-side compilation into
    host
     >     object file.
     >
     >   - Added a test for cuda pipeline creation in clang driver.
     >
     >   New clang options:
     >   --cuda-host-only   - Do host-side compilation only.
     >   --cuda-device-only - Do device-side compilation only.
     >
     >   --cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side
     >     compilation. E.g. sm_35, sm_30. Default is sm_20. May be used
    more
     >     than once in which case one device-compilation will be done per
     >     unique specified GPU architecture.
     >
     >   Differential Revision: http://reviews.llvm.org/D9509
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__reviews.llvm.org_D9509&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=WDZ8ek1Ld-BGQdaVQW36Kddq-tnuTGgtLmj3LJL3Mbo&e=>
     >
     > Added:
     >     cfe/trunk/test/Driver/cuda-options.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__cuda-2Doptions.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=JETtyv_A1yjmTD_KmoFDoStKOHa9fmHDH6OI0wsJLU8&e=>
     >     cfe/trunk/test/Index/index-file.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__index-2Dfile.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=Z26JivvnfsF4tloeePDLQLHxsFQrEclL-D6_alidUsQ&e=>
     > Modified:
     >     cfe/trunk/include/clang/Driver/Action.h
     >     cfe/trunk/include/clang/Driver/Options.td
     >     cfe/trunk/include/clang/Driver/Types.def
     >     cfe/trunk/include/clang/Driver/Types.h
     >     cfe/trunk/lib/Driver/Action.cpp
     >     cfe/trunk/lib/Driver/Driver.cpp
     >     cfe/trunk/lib/Driver/ToolChain.cpp
     >     cfe/trunk/lib/Driver/ToolChains.cpp
     >     cfe/trunk/lib/Driver/ToolChains.h
     >     cfe/trunk/lib/Driver/Tools.cpp
     >     cfe/trunk/lib/Driver/Types.cpp
     >     cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp
     >     cfe/trunk/test/Index/attributes-cuda.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__attributes-2Dcuda.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=mTykXFMrswBOTJhzDBRffZ1C-BvcUHI5JLq6wIBSfbg&e=>
     >     cfe/trunk/tools/libclang/CIndex.cpp
     >     cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h
     >
     > Modified: cfe/trunk/include/clang/Driver/Action.h
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_include_clang_Driver_Action.h-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=vtHhQFgm5Qwu2PCa9zRkNf9giuN7kDBY0YAbrTGvHXU&e=>
     >
    
==============================================================================
     > --- cfe/trunk/include/clang/Driver/Action.h (original)
     > +++ cfe/trunk/include/clang/Driver/Action.h Mon Jul 13 18:27:56 2015
     > @@ -41,6 +41,8 @@ public:
     >    enum ActionClass {
     >      InputClass = 0,
     >      BindArchClass,
     > +    CudaDeviceClass,
     > +    CudaHostClass,
     >      PreprocessJobClass,
     >      PrecompileJobClass,
     >      AnalyzeJobClass,
     > @@ -133,6 +135,41 @@ public:
     >    }
     >  };
     >
     > +class CudaDeviceAction : public Action {
     > +  virtual void anchor();
     > +  /// GPU architecture to bind -- e.g 'sm_35'.
     > +  const char *GpuArchName;
     > +  /// True when action results are not consumed by the host
    action (e.g when
     > +  /// -fsyntax-only or --cuda-device-only options are used).
     > +  bool AtTopLevel;
     > +
     > +public:
     > +  CudaDeviceAction(std::unique_ptr<Action> Input, const char
    *ArchName,
     > +                   bool AtTopLevel);
     > +
     > +  const char *getGpuArchName() const { return GpuArchName; }
     > +  bool isAtTopLevel() const { return AtTopLevel; }
     > +
     > +  static bool classof(const Action *A) {
     > +    return A->getKind() == CudaDeviceClass;
     > +  }
     > +};
     > +
     > +class CudaHostAction : public Action {
     > +  virtual void anchor();
     > +  ActionList DeviceActions;
     > +
     > +public:
     > +  CudaHostAction(std::unique_ptr<Action> Input,
     > +                 const ActionList &DeviceActions);
     > +  ~CudaHostAction() override;
     > +
     > +  ActionList &getDeviceActions() { return DeviceActions; }
     > +  const ActionList &getDeviceActions() const { return
    DeviceActions; }
     > +
     > +  static bool classof(const Action *A) { return A->getKind() ==
    CudaHostClass; }
     > +};
     > +
     >  class JobAction : public Action {
     >    virtual void anchor();
     >  protected:
     >
     > Modified: cfe/trunk/include/clang/Driver/Options.td
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_include_clang_Driver_Options.td-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=1MRIlu0X9xy9Or54b4zLvE6SaBFx8eRFMa2VZzjakQw&e=>
     >
    
==============================================================================
     > --- cfe/trunk/include/clang/Driver/Options.td (original)
     > +++ cfe/trunk/include/clang/Driver/Options.td Mon Jul 13 18:27:56
    2015
     > @@ -351,6 +351,12 @@ def cxx_isystem : JoinedOrSeparate<["-"]
     >    MetaVarName<"<directory>">;
     >  def c : Flag<["-"], "c">, Flags<[DriverOption]>,
     >    HelpText<"Only run preprocess, compile, and assemble steps">;
     > +def cuda_device_only : Flag<["--"], "cuda-device-only">,
     > +  HelpText<"Do device-side CUDA compilation only">;
     > +def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">,
     > +  Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU
    architecture">;
     > +def cuda_host_only : Flag<["--"], "cuda-host-only">,
     > +  HelpText<"Do host-side CUDA compilation only">;
     >  def dA : Flag<["-"], "dA">, Group<d_Group>;
     >  def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
     >    HelpText<"Print macro definitions in -E mode in addition to
    normal output">;
     >
     > Modified: cfe/trunk/include/clang/Driver/Types.def
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_include_clang_Driver_Types.def-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=ZA_-9prtGSwjxyxYprruQhlLCw4qlQxdm8iD3MnfVHg&e=>
     >
    
==============================================================================
     > --- cfe/trunk/include/clang/Driver/Types.def (original)
     > +++ cfe/trunk/include/clang/Driver/Types.def Mon Jul 13 18:27:56 2015
     > @@ -44,6 +44,7 @@ TYPE("c",                        C,
     >  TYPE("cl",                       CL,           PP_C,
    "cl",    "u")
     >  TYPE("cuda-cpp-output",          PP_CUDA,      INVALID,
      "cui",   "u")
     >  TYPE("cuda",                     CUDA,         PP_CUDA,
      "cu",    "u")
     > +TYPE("cuda",                     CUDA_DEVICE,  PP_CUDA,
      "cu",    "")
     >  TYPE("objective-c-cpp-output",   PP_ObjC,      INVALID,
      "mi",    "u")
     >  TYPE("objc-cpp-output",          PP_ObjC_Alias, INVALID,
    "mi",    "u")
     >  TYPE("objective-c",              ObjC,         PP_ObjC,
      "m",     "u")
     >
     > Modified: cfe/trunk/include/clang/Driver/Types.h
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_include_clang_Driver_Types.h-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=MRbkm5exCU3h3iLWd3Gcx44y-ap92liIEwXTOMoKAQs&e=>
     >
    
==============================================================================
     > --- cfe/trunk/include/clang/Driver/Types.h (original)
     > +++ cfe/trunk/include/clang/Driver/Types.h Mon Jul 13 18:27:56 2015
     > @@ -63,6 +63,9 @@ namespace types {
     >    /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and
    headers).
     >    bool isCXX(ID Id);
     >
     > +  /// isCuda - Is this a CUDA input.
     > +  bool isCuda(ID Id);
     > +
     >    /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++
    sources and headers).
     >    bool isObjC(ID Id);
     >
     >
     > Modified: cfe/trunk/lib/Driver/Action.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_Action.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=wqdIvIUiipcMnu3xYygaEDHjM6ohCAVUX2K7e4ULSpo&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/Action.cpp (original)
     > +++ cfe/trunk/lib/Driver/Action.cpp Mon Jul 13 18:27:56 2015
     > @@ -24,6 +24,8 @@ const char *Action::getClassName(ActionC
     >    switch (AC) {
     >    case InputClass: return "input";
     >    case BindArchClass: return "bind-arch";
     > +  case CudaDeviceClass: return "cuda-device";
     > +  case CudaHostClass: return "cuda-host";
     >    case PreprocessJobClass: return "preprocessor";
     >    case PrecompileJobClass: return "precompiler";
     >    case AnalyzeJobClass: return "analyzer";
     > @@ -53,6 +55,25 @@ BindArchAction::BindArchAction(std::uniq
     >                                 const char *_ArchName)
     >      : Action(BindArchClass, std::move(Input)),
    ArchName(_ArchName) {}
     >
     > +void CudaDeviceAction::anchor() {}
     > +
     > +CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
     > +                                   const char *ArchName, bool
    AtTopLevel)
     > +    : Action(CudaDeviceClass, std::move(Input)),
    GpuArchName(ArchName),
     > +      AtTopLevel(AtTopLevel) {}
     > +
     > +void CudaHostAction::anchor() {}
     > +
     > +CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
     > +                               const ActionList &_DeviceActions)

    Identifiers starting with an underscore than a capital are reserved.
    Please just call this DeviceActions.

     > +    : Action(CudaHostClass, std::move(Input)),
    DeviceActions(_DeviceActions) {}
     > +
     > +CudaHostAction::~CudaHostAction() {
     > +  for (iterator it = DeviceActions.begin(), ie =
    DeviceActions.end(); it != ie;

    We usually name variables starting with a capital letter, so these
    should probably be `I` and `E` rather than `it` and `ie`. Might as well
    use auto here as well, since they're just iterators.

    This applies in several places later in this patch too. Please clean
    them up.

     > +       ++it)
     > +    delete *it;
     > +}
     > +
     >  void JobAction::anchor() {}
     >
     >  JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action>
    Input,
     >
     > Modified: cfe/trunk/lib/Driver/Driver.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_Driver.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=-CzdHSqTWN30s5-qcBE0JFP-chY9pWXeFANVzz2hNlI&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/Driver.cpp (original)
     > +++ cfe/trunk/lib/Driver/Driver.cpp Mon Jul 13 18:27:56 2015
     > @@ -174,8 +174,10 @@ phases::ID Driver::getFinalPhase(const D
     >    } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
     >      FinalPhase = phases::Backend;
     >
     > -    // -c only runs up to the assembler.
     > -  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
     > +    // -c and partial CUDA compilations only run up to the
    assembler.
     > +  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) ||
     > +             (PhaseArg =
    DAL.getLastArg(options::OPT_cuda_device_only)) ||
     > +             (PhaseArg =
    DAL.getLastArg(options::OPT_cuda_host_only))) {
     >      FinalPhase = phases::Assemble;
     >
     >      // Otherwise do everything.
     > @@ -900,9 +902,20 @@ static unsigned PrintActions1(const Comp
     >    } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
     >      os << '"' << BIA->getArchName() << '"' << ", {"
     >         << PrintActions1(C, *BIA->begin(), Ids) << "}";
     > +  } else if (CudaDeviceAction *CDA =
    dyn_cast<CudaDeviceAction>(A)) {
     > +    os << '"' << CDA->getGpuArchName() << '"' << ", {"
     > +       << PrintActions1(C, *CDA->begin(), Ids) << "}";
     >    } else {
     > +    ActionList *AL;
     > +    if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
     > +      os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
     > +         << ", gpu binaries ";
     > +      AL = &CHA->getDeviceActions();
     > +    } else
     > +      AL = &A->getInputs();
     > +
     >      const char *Prefix = "{";
     > -    for (Action *PreRequisite : *A) {
     > +    for (Action *PreRequisite : *AL) {
     >        os << Prefix << PrintActions1(C, PreRequisite, Ids);
     >        Prefix = ", ";
     >      }
     > @@ -1215,6 +1228,93 @@ void Driver::BuildInputs(const ToolChain
     >    }
     >  }
     >
     > +// For each unique --cuda-gpu-arch= argument creates a
    TY_CUDA_DEVICE input
     > +// action and then wraps each in CudaDeviceAction paired with
    appropriate GPU
     > +// arch name. If we're only building device-side code, each
    action remains
     > +// independent. Otherwise we pass device-side actions as inputs
    to a new
     > +// CudaHostAction which combines both host and device side actions.
     > +static std::unique_ptr<Action>
     > +buildCudaActions(const Driver &D, const ToolChain &TC,
    DerivedArgList &Args,
     > +                 const Arg *InputArg, const types::ID InputType,
     > +                 std::unique_ptr<Action> Current, ActionList
    &Actions) {
     > +
     > +  assert(InputType == types::TY_CUDA &&
     > +         "CUDA Actions only apply to CUDA inputs.");
     > +
     > +  // Collect all cuda_gpu_arch parameters, removing duplicates.
     > +  SmallVector<const char *, 4> GpuArchList;
     > +  llvm::StringSet<> GpuArchNames;
     > +  for (Arg *A : Args) {
     > +    if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) {
     > +      A->claim();
     > +      if (GpuArchNames.insert(A->getValue()).second)
     > +        GpuArchList.push_back(A->getValue());
     > +    }
     > +  }
     > +
     > +  // Default to sm_20 which is the lowest common denominator for
    supported GPUs.
     > +  // sm_20 code should work correctly, if suboptimally, on all
    newer GPUs.
     > +  if (GpuArchList.empty())
     > +    GpuArchList.push_back("sm_20");
     > +
     > +  // Replicate inputs for each GPU architecture.
     > +  Driver::InputList CudaDeviceInputs;
     > +  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
     > +
    CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE,
    InputArg));
     > +
     > +  // Build actions for all device inputs.
     > +  ActionList CudaDeviceActions;
     > +  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
     > +  assert(GpuArchList.size() == CudaDeviceActions.size() &&
     > +         "Failed to create actions for all devices");
     > +
     > +  // Check whether any of device actions stopped before they
    could generate PTX.
     > +  bool PartialCompilation = false;
     > +  bool DeviceOnlyCompilation =
    Args.hasArg(options::OPT_cuda_device_only);

    The ordering here is confusing - it looks like DeviceOnlyCompilation is
    related to the loop that sets PartialCompilation.

     > +  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) {
     > +    if (CudaDeviceActions[i]->getKind() !=
    Action::BackendJobClass) {
     > +      PartialCompilation = true;
     > +      break;
     > +    }
     > +  }
     > +
     > +  // Figure out what to do with device actions -- pass them as
    inputs to the
     > +  // host action or run each of them independently.
     > +  if (PartialCompilation || DeviceOnlyCompilation) {
     > +    // In case of partial or device-only compilation results of
    device actions
     > +    // are not consumed by the host action device actions have
    to be added to
     > +    // top-level actions list with AtTopLevel=true and run
    independently.
     > +
     > +    // -o is ambiguous if we have more than one top-level action.
     > +    if (Args.hasArg(options::OPT_o) &&
     > +        (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
     > +
    D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
     > +      return nullptr;
     > +    }
     > +
     > +    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
     > +      Actions.push_back(
     > +          new
    CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
     > +                               GpuArchList[i], /* AtTopLevel */
    true));
     > +    // Kill host action in case of device-only compilation.
     > +    if (DeviceOnlyCompilation)
     > +      Current.reset(nullptr);
     > +    return Current;
     > +  } else {

    Since the `if` returns unconditionally, an early exit is better than an
    else.

     > +    // Outputs of device actions during complete CUDA
    compilation get created
     > +    // with AtTopLevel=false and become inputs for the host action.
     > +    ActionList DeviceActions;
     > +    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
     > +      DeviceActions.push_back(
     > +          new
    CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
     > +                               GpuArchList[i], /* AtTopLevel */
    false));
     > +    // Return a new host action that incorporates original host
    action and all
     > +    // device actions.
     > +    return std::unique_ptr<Action>(
     > +        new CudaHostAction(std::move(Current), DeviceActions));
     > +  }
     > +}
     > +
     >  void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
     >                            const InputList &Inputs, ActionList
    &Actions) const {
     >    llvm::PrettyStackTraceString CrashInfo("Building compilation
    actions");
     > @@ -1312,6 +1412,25 @@ void Driver::BuildActions(const ToolChai
     >        continue;
     >      }
     >
     > +    phases::ID CudaInjectionPhase;
     > +    if (isSaveTempsEnabled()) {
     > +      // All phases are done independently, inject GPU blobs
    during compilation
     > +      // phase as that's where we generate glue code to init them.
     > +      CudaInjectionPhase = phases::Compile;
     > +    } else {
     > +      // Assumes that clang does everything up until linking
    phase, so we inject
     > +      // cuda device actions at the last step before linking.
    Otherwise CUDA
     > +      // host action forces preprocessor into a separate invocation.
     > +      if (FinalPhase == phases::Link) {

    This is `else if`. Nesting the if inside the else is confusing.

     > +        for (auto i = PL.begin(), e = PL.end(); i != e; ++i) {
     > +          auto next = i + 1;
     > +          if (next != e && *next == phases::Link)

    What if i == e?

     > +            CudaInjectionPhase = *i;
     > +        }
     > +      } else
     > +        CudaInjectionPhase = FinalPhase;
     > +    }
     > +
     >      // Build the pipeline for this file.
     >      std::unique_ptr<Action> Current(new InputAction(*InputArg,
    InputType));
     >      for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e
    = PL.end();
     > @@ -1337,6 +1456,15 @@ void Driver::BuildActions(const ToolChai
     >
     >        // Otherwise construct the appropriate action.
     >        Current = ConstructPhaseAction(TC, Args, Phase,
    std::move(Current));
     > +
     > +      if (InputType == types::TY_CUDA && Phase ==
    CudaInjectionPhase &&
     > +          !Args.hasArg(options::OPT_cuda_host_only)) {
     > +        Current = buildCudaActions(*this, TC, Args, InputArg,
    InputType,
     > +                                   std::move(Current), Actions);
     > +        if (!Current)
     > +          break;
     > +      }

    This whole block of code seems out of place. In fact, why doesn't CUDA
    just have its own phases::ID? It seems like that would simplify all of
    this.

     > +
     >        if (Current->getType() == types::TY_Nothing)
     >          break;
     >      }
     > @@ -1576,7 +1704,13 @@ static const Tool *SelectToolForJob(Comp
     >    if (isa<BackendJobAction>(JA)) {
     >      // Check if the compiler supports emitting LLVM IR.
     >      assert(Inputs->size() == 1);
     > -    JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin());
     > +    JobAction *CompileJA;
     > +    // Extract real host action, if it's a CudaHostAction.
     > +    if (CudaHostAction *CudaHA =
    dyn_cast<CudaHostAction>(*Inputs->begin()))
     > +      CompileJA = cast<CompileJobAction>(*CudaHA->begin());
     > +    else
     > +      CompileJA = cast<CompileJobAction>(*Inputs->begin());

    This seems kind of awkward. Not sure I have a better suggestion though.

     > +
     >      const Tool *Compiler = TC->SelectTool(*CompileJA);
     >      if (!Compiler)
     >        return nullptr;
     > @@ -1610,6 +1744,20 @@ void Driver::BuildJobsForAction(Compilat
     >                                  InputInfo &Result) const {
     >    llvm::PrettyStackTraceString CrashInfo("Building compilation
    jobs");
     >
     > +  InputInfoList CudaDeviceInputInfos;
     > +  if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
     > +    InputInfo II;
     > +    // Append outputs of device jobs to the input list.
     > +    for (const Action *DA : CHA->getDeviceActions()) {
     > +      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
     > +                         /*MultipleArchs*/ false, LinkingOutput,
    II);
     > +      CudaDeviceInputInfos.push_back(II);
     > +    }
     > +    // Override current action with a real host compile action
    and continue
     > +    // processing it.
     > +    A = *CHA->begin();
     > +  }
     > +
     >    if (const InputAction *IA = dyn_cast<InputAction>(A)) {
     >      // FIXME: It would be nice to not claim this here; maybe the
    old scheme of
     >      // just using Args was better?
     > @@ -1635,11 +1783,24 @@ void Driver::BuildJobsForAction(Compilat
     >      else
     >        TC = &C.getDefaultToolChain();
     >
     > -    BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(),
    AtTopLevel,
     > +    BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,

    I guess this is just an unrelated cleanup? Better to commit those
    separately in the future.

     >                         MultipleArchs, LinkingOutput, Result);
     >      return;
     >    }
     >
     > +  if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
     > +    // Figure out which NVPTX triple to use for device-side
    compilation based on
     > +    // whether host is 64-bit.
     > +    llvm::Triple
    DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit()
     > +                                  ? "nvptx64-nvidia-cuda"
     > +                                  : "nvptx-nvidia-cuda");

    It seems like a pretty bad layering violation to need to know these
    particular triples right here...

    Is getDefaultToolChain() even the right thing to do here? Isn't `TC`
    more appropriate?

     > +    BuildJobsForAction(C, *CDA->begin(),
     > +                       &getToolChain(C.getArgs(), DeviceTriple),
     > +                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
     > +                       /*MultipleArchs*/ true, LinkingOutput,
    Result);
     > +    return;
     > +  }
     > +
     >    const ActionList *Inputs = &A->getInputs();
     >
     >    const JobAction *JA = cast<JobAction>(A);
     > @@ -1671,6 +1832,10 @@ void Driver::BuildJobsForAction(Compilat
     >    if (JA->getType() == types::TY_dSYM)
     >      BaseInput = InputInfos[0].getFilename();
     >
     > +  // Append outputs of cuda device jobs to the input list
     > +  if (CudaDeviceInputInfos.size())
     > +    InputInfos.append(CudaDeviceInputInfos.begin(),
    CudaDeviceInputInfos.end());
     > +
     >    // Determine the place to write output to, if any.
     >    if (JA->getType() == types::TY_Nothing)
     >      Result = InputInfo(A->getType(), BaseInput);
     > @@ -2052,6 +2217,9 @@ const ToolChain &Driver::getToolChain(co
     >          break;
     >        }
     >        break;
     > +    case llvm::Triple::CUDA:
     > +      TC = new toolchains::CudaToolChain(*this, Target, Args);
     > +      break;
     >      default:
     >        // Of these targets, Hexagon is the only one that might have
     >        // an OS of Linux, in which case it got handled above already.
     >
     > Modified: cfe/trunk/lib/Driver/ToolChain.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChain.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_ToolChain.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=qqzI495chQgaGHf5tBIJFYdjfnbBqT1xCg4lX1igKik&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/ToolChain.cpp (original)
     > +++ cfe/trunk/lib/Driver/ToolChain.cpp Mon Jul 13 18:27:56 2015
     > @@ -151,6 +151,8 @@ Tool *ToolChain::getTool(Action::ActionC
     >
     >    case Action::InputClass:
     >    case Action::BindArchClass:
     > +  case Action::CudaDeviceClass:
     > +  case Action::CudaHostClass:
     >    case Action::LipoJobClass:
     >    case Action::DsymutilJobClass:
     >    case Action::VerifyDebugInfoJobClass:
     >
     > Modified: cfe/trunk/lib/Driver/ToolChains.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_ToolChains.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=M6UcaDzO1V5zfrGrbm-oagDHi1FgOIvQGwAWJltl778&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/ToolChains.cpp (original)
     > +++ cfe/trunk/lib/Driver/ToolChains.cpp Mon Jul 13 18:27:56 2015
     > @@ -3652,6 +3652,65 @@ Tool *DragonFly::buildLinker() const {
     >    return new tools::dragonfly::Linker(*this);
     >  }
     >
     > +/// Stub for CUDA toolchain. At the moment we don't have
    assembler or
     > +/// linker and need toolchain mainly to propagate device-side
    options
     > +/// to CC1.
     > +
     > +CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple
    &Triple,
     > +                             const ArgList &Args)
     > +    : Linux(D, Triple, Args) {}
     > +
     > +void
     > +CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList
    &DriverArgs,
     > +                                     llvm::opt::ArgStringList
    &CC1Args) const {
     > +  Linux::addClangTargetOptions(DriverArgs, CC1Args);
     > +  CC1Args.push_back("-fcuda-is-device");
     > +}
     > +
     > +llvm::opt::DerivedArgList *
     > +CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
     > +                             const char *BoundArch) const {
     > +  DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
     > +  const OptTable &Opts = getDriver().getOpts();
     > +
     > +  for (Arg *A : Args) {
     > +    if (A->getOption().matches(options::OPT_Xarch__)) {
     > +      // Skip this argument unless the architecture matches
    BoundArch
     > +      if (A->getValue(0) != StringRef(BoundArch))
     > +        continue;
     > +
     > +      unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
     > +      unsigned Prev = Index;
     > +      std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
     > +
     > +      // If the argument parsing failed or more than one
    argument was
     > +      // consumed, the -Xarch_ argument's parameter tried to consume
     > +      // extra arguments. Emit an error and ignore.
     > +      //
     > +      // We also want to disallow any options which would alter the
     > +      // driver behavior; that isn't going to work in our model. We
     > +      // use isDriverOption() as an approximation, although things
     > +      // like -O4 are going to slip through.
     > +      if (!XarchArg || Index > Prev + 1) {
     > +
    getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
     > +            << A->getAsString(Args);
     > +        continue;
     > +      } else if
    (XarchArg->getOption().hasFlag(options::DriverOption)) {
     > +
    getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
     > +            << A->getAsString(Args);
     > +        continue;
     > +      }
     > +      XarchArg->setBaseArg(A);
     > +      A = XarchArg.release();
     > +      DAL->AddSynthesizedArg(A);
     > +    }
     > +    DAL->append(A);
     > +  }
     > +
     > +  DAL->AddJoinedArg(nullptr,
    Opts.getOption(options::OPT_march_EQ), BoundArch);
     > +  return DAL;
     > +}
     > +
     >  /// XCore tool chain
     >  XCore::XCore(const Driver &D, const llvm::Triple &Triple, const
    ArgList &Args)
     >      : ToolChain(D, Triple, Args) {
     >
     > Modified: cfe/trunk/lib/Driver/ToolChains.h
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.h?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_ToolChains.h-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=ozln2maQC62P_Kc4R1iHZXtNIeMs9b33UDhfWr_8u_o&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/ToolChains.h (original)
     > +++ cfe/trunk/lib/Driver/ToolChains.h Mon Jul 13 18:27:56 2015
     > @@ -699,6 +699,18 @@ private:
     >    std::string computeSysRoot() const;
     >  };
     >
     > +class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
     > +public:
     > +  CudaToolChain(const Driver &D, const llvm::Triple &Triple,
     > +                const llvm::opt::ArgList &Args);
     > +
     > +  llvm::opt::DerivedArgList *
     > +  TranslateArgs(const llvm::opt::DerivedArgList &Args,
     > +                const char *BoundArch) const override;
     > +  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
     > +                             llvm::opt::ArgStringList &CC1Args)
    const override;
     > +};
     > +
     >  class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux {
     >  protected:
     >    GCCVersion GCCLibAndIncVersion;
     >
     > Modified: cfe/trunk/lib/Driver/Tools.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_Tools.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=CwavFMr1KVZKGP2wUxyDOFt5ZPH6qp9lh94wZ7M_THo&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/Tools.cpp (original)
     > +++ cfe/trunk/lib/Driver/Tools.cpp Mon Jul 13 18:27:56 2015
     > @@ -1488,6 +1488,12 @@ static std::string getCPUName(const ArgL
     >      return CPUName;
     >    }
     >
     > +  case llvm::Triple::nvptx:
     > +  case llvm::Triple::nvptx64:
     > +    if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
     > +      return A->getValue();
     > +    return "";
     > +
     >    case llvm::Triple::ppc:
     >    case llvm::Triple::ppc64:
     >    case llvm::Triple::ppc64le: {
     > @@ -2826,8 +2832,14 @@ void Clang::ConstructJob(Compilation &C,
     >        getToolChain().getTriple().isWindowsCygwinEnvironment();
     >    bool IsWindowsMSVC =
    getToolChain().getTriple().isWindowsMSVCEnvironment();
     >
     > -  assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
     > +  // Check number of inputs for sanity. We need at least one input.
     > +  assert(Inputs.size() >= 1 && "Must have at least one input.");
     >    const InputInfo &Input = Inputs[0];
     > +  // CUDA compilation may have multiple inputs (source file +
    results of
     > +  // device-side compilations). All other jobs are expected to
    have exactly one
     > +  // input.
     > +  bool IsCuda = types::isCuda(Input.getType());
     > +  assert((IsCuda || Inputs.size() == 1) && "Unable to handle
    multiple inputs.");
     >
     >    // Invoke ourselves in -cc1 mode.
     >    //
     > @@ -4812,14 +4824,12 @@ void Clang::ConstructJob(Compilation &C,
     >      assert(Output.isNothing() && "Invalid output.");
     >    }
     >
     > -  for (const auto &II : Inputs) {
     > -    addDashXForInput(Args, II, CmdArgs);
     > +  addDashXForInput(Args, Input, CmdArgs);
     >
     > -    if (II.isFilename())
     > -      CmdArgs.push_back(II.getFilename());
     > -    else
     > -      II.getInputArg().renderAsInput(Args, CmdArgs);
     > -  }
     > +  if (Input.isFilename())
     > +    CmdArgs.push_back(Input.getFilename());
     > +  else
     > +    Input.getInputArg().renderAsInput(Args, CmdArgs);
     >
     >    Args.AddAllArgs(CmdArgs, options::OPT_undef);
     >
     > @@ -4857,6 +4867,16 @@ void Clang::ConstructJob(Compilation &C,
     >      CmdArgs.push_back(SplitDwarfOut);
     >    }
     >
     > +  // Host-side cuda compilation receives device-side outputs as
    Inputs[1...].
     > +  // Include them with -fcuda-include-gpubinary.
     > +  if (IsCuda && Inputs.size() > 1)
     > +    for (InputInfoList::const_iterator it =
    std::next(Inputs.begin()),
     > +                                       ie = Inputs.end();
     > +         it != ie; ++it) {
     > +      CmdArgs.push_back("-fcuda-include-gpubinary");
     > +      CmdArgs.push_back(it->getFilename());
     > +    }
     > +
     >    // Finally add the compile command to the compilation.
     >    if (Args.hasArg(options::OPT__SLASH_fallback) &&
     >        Output.getType() == types::TY_Object &&
     >
     > Modified: cfe/trunk/lib/Driver/Types.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Types.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Driver_Types.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=9AMjh1hCYVutEDGvGsrfutLiN-liIV6zlcJutmRqmWk&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Driver/Types.cpp (original)
     > +++ cfe/trunk/lib/Driver/Types.cpp Mon Jul 13 18:27:56 2015
     > @@ -86,6 +86,7 @@ bool types::isAcceptedByClang(ID Id) {
     >    case TY_C: case TY_PP_C:
     >    case TY_CL:
     >    case TY_CUDA: case TY_PP_CUDA:
     > +  case TY_CUDA_DEVICE:
     >    case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias:
     >    case TY_CXX: case TY_PP_CXX:
     >    case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
     > @@ -122,7 +123,19 @@ bool types::isCXX(ID Id) {
     >    case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
     >    case TY_CXXHeader: case TY_PP_CXXHeader:
     >    case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
     > -  case TY_CUDA: case TY_PP_CUDA:
     > +  case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
     > +    return true;
     > +  }
     > +}
     > +
     > +bool types::isCuda(ID Id) {
     > +  switch (Id) {
     > +  default:
     > +    return false;
     > +
     > +  case TY_CUDA:
     > +  case TY_PP_CUDA:
     > +  case TY_CUDA_DEVICE:
     >      return true;
     >    }
     >  }
     > @@ -206,10 +219,12 @@ void types::getCompilationPhases(ID Id,
     >          P.push_back(phases::Compile);
     >          P.push_back(phases::Backend);
     >        }
     > -      P.push_back(phases::Assemble);
     > +      if (Id != TY_CUDA_DEVICE)
     > +        P.push_back(phases::Assemble);
     >      }
     >    }
     > -  if (!onlyPrecompileType(Id)) {
     > +
     > +  if (!onlyPrecompileType(Id) && Id != TY_CUDA_DEVICE) {
     >      P.push_back(phases::Link);
     >    }
     >    assert(0 < P.size() && "Not enough phases in list");
     >
     > Modified: cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_lib_Frontend_CreateInvocationFromCommandLine.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=wg3t3SrkNqbaYgAoHetHahNjzYN60KNOwlZ5uzG-8HM&e=>
     >
    
==============================================================================
     > --- cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp
    (original)
     > +++ cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp
    Mon Jul 13 18:27:56 2015
     > @@ -15,6 +15,7 @@
     >  #include "clang/Basic/DiagnosticOptions.h"
     >  #include "clang/Driver/Compilation.h"
     >  #include "clang/Driver/Driver.h"
     > +#include "clang/Driver/Action.h"
     >  #include "clang/Driver/Options.h"
     >  #include "clang/Driver/Tool.h"
     >  #include "clang/Frontend/CompilerInstance.h"
     > @@ -61,9 +62,25 @@ clang::createInvocationFromCommandLine(A
     >    }
     >
     >    // We expect to get back exactly one command job, if we didn't
    something
     > -  // failed.
     > +  // failed. CUDA compilation is an exception as it creates
    multiple jobs. If
     > +  // that's the case, we proceed with the first job. If caller
    needs particular
     > +  // CUDA job, it should be controlled via
    --cuda-{host|device}-only option
     > +  // passed to the driver.
     >    const driver::JobList &Jobs = C->getJobs();
     > -  if (Jobs.size() != 1 || !isa<driver::Command>(*Jobs.begin())) {
     > +  bool CudaCompilation = false;
     > +  if (Jobs.size() > 1) {
     > +    for (auto &A : C->getActions()){
     > +      // On MacOSX real actions may end up being wrapped in
    BindArchAction
     > +      if (isa<driver::BindArchAction>(A))
     > +        A = *A->begin();
     > +      if (isa<driver::CudaDeviceAction>(A)) {
     > +        CudaCompilation = true;
     > +        break;
     > +      }
     > +    }
     > +  }
     > +  if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) ||
     > +      (Jobs.size() > 1 && !CudaCompilation)) {
     >      SmallString<256> Msg;
     >      llvm::raw_svector_ostream OS(Msg);
     >      Jobs.Print(OS, "; ", true);
     >
     > Added: cfe/trunk/test/Driver/cuda-options.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__cuda-2Doptions.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=JETtyv_A1yjmTD_KmoFDoStKOHa9fmHDH6OI0wsJLU8&e=>
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=242085&view=auto
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_test_Driver_cuda-2Doptions.cu-3Frev-3D242085-26view-3Dauto&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=FylP1lgV5yOJ3MWPvEkQxpMnX6K1fmsutyfjY1CPFVg&e=>
     >
    
==============================================================================
     > --- cfe/trunk/test/Driver/cuda-options.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__cuda-2Doptions.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=JETtyv_A1yjmTD_KmoFDoStKOHa9fmHDH6OI0wsJLU8&e=>
    (added)
     > +++ cfe/trunk/test/Driver/cuda-options.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__cuda-2Doptions.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=JETtyv_A1yjmTD_KmoFDoStKOHa9fmHDH6OI0wsJLU8&e=>
    Mon Jul 13 18:27:56 2015
     > @@ -0,0 +1,109 @@
     > +// Tests CUDA compilation pipeline construction in Driver.
     > +// REQUIRES: clang-driver
     > +
     > +// Simple compilation case:
     > +// RUN: %clang -### -c %s 2>&1 \
     > +// Compile device-side to PTX assembly and make sure we use it
    on the host side.
     > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
     > +// Then compile host side and incorporate device code.
     > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
     > +// Make sure we don't link anything.
     > +// RUN:   -check-prefix CUDA-NL %s
     > +
     > +// Typical compilation + link case:
     > +// RUN: %clang -### %s 2>&1 \
     > +// Compile device-side to PTX assembly and make sure we use it
    on the host side
     > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
     > +// Then compile host side and incorporate device code.
     > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
     > +// Then link things.
     > +// RUN:   -check-prefix CUDA-L %s
     > +
     > +// Verify that -cuda-no-device disables device-side compilation
    and linking
     > +// RUN: %clang -### --cuda-host-only %s 2>&1 \
     > +// Make sure we didn't run device-side compilation.
     > +// RUN:   | FileCheck -check-prefix CUDA-ND \
     > +// Then compile host side and make sure we don't attempt to
    incorporate GPU code.
     > +// RUN:    -check-prefix CUDA-H -check-prefix CUDA-H-NI \
     > +// Make sure we don't link anything.
     > +// RUN:    -check-prefix CUDA-NL %s
     > +
     > +// Verify that -cuda-no-host disables host-side compilation and
    linking
     > +// RUN: %clang -### --cuda-device-only %s 2>&1 \
     > +// Compile device-side to PTX assembly
     > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
     > +// Make sure there are no host cmpilation or linking.
     > +// RUN:   -check-prefix CUDA-NH -check-prefix CUDA-NL %s
     > +
     > +// Verify that with -S we compile host and device sides to assembly
     > +// and incorporate device code on the host side.
     > +// RUN: %clang -### -S -c %s 2>&1 \
     > +// Compile device-side to PTX assembly
     > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
     > +// Then compile host side and incorporate GPU code.
     > +// RUN:  -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
     > +// Make sure we don't link anything.
     > +// RUN:  -check-prefix CUDA-NL %s
     > +
     > +// Verify that --cuda-gpu-arch option passes correct GPU
     > +// archtecture info to device compilation.
     > +// RUN: %clang -### --cuda-gpu-arch=sm_35 -c %s 2>&1 \
     > +// Compile device-side to PTX assembly.
     > +// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix
    CUDA-D1-SM35 \
     > +// Then compile host side and incorporate GPU code.
     > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
     > +// Make sure we don't link anything.
     > +// RUN:   -check-prefix CUDA-NL %s
     > +
     > +// Verify that there is device-side compilation per
    --cuda-gpu-arch args
     > +// and that all results are included on the host side.
     > +// RUN: %clang -### --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30
    -c %s 2>&1 \
     > +// Compile both device-sides to PTX assembly
     > +// RUN:   | FileCheck \
     > +// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
     > +// RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \
     > +// Then compile host side and incorporate both device-side outputs
     > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1
    -check-prefix CUDA-H-I2 \
     > +// Make sure we don't link anything.
     > +// RUN:   -check-prefix CUDA-NL %s
     > +
     > +// Match device-side compilation
     > +// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
     > +// CUDA-D1-SAME: "-fcuda-is-device"
     > +// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
     > +// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
     > +// CUDA-D1-SAME: "-x" "cuda"
     > +
     > +// Match anothe device-side compilation
     > +// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
     > +// CUDA-D2-SAME: "-fcuda-is-device"
     > +// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
     > +// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
     > +// CUDA-D2-SAME: "-x" "cuda"
     > +
     > +// Match no device-side compilation
     > +// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
     > +// CUDA-ND-SAME-NOT: "-fcuda-is-device"
     > +
     > +// Match host-side compilation
     > +// CUDA-H: "-cc1" "-triple"
     > +// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda"
     > +// CUDA-H-SAME-NOT: "-fcuda-is-device"
     > +// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]"
     > +// CUDA-H-SAME: "-x" "cuda"
     > +// CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]"
     > +// CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]"
     > +
     > +// Match no GPU code inclusion.
     > +// CUDA-H-NI-NOT: "-fcuda-include-gpubinary"
     > +
     > +// Match no CUDA compilation
     > +// CUDA-NH-NOT: "-cc1" "-triple"
     > +// CUDA-NH-SAME-NOT: "-x" "cuda"
     > +
     > +// Match linker
     > +// CUDA-L: "{{.*}}ld{{(.exe)?}}"
     > +// CUDA-L-SAME: "[[HOSTOBJ]]"
     > +
     > +// Match no linker
     > +// CUDA-NL-NOT: "{{.*}}ld{{(.exe)?}}"
     >
     > Modified: cfe/trunk/test/Index/attributes-cuda.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__attributes-2Dcuda.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=mTykXFMrswBOTJhzDBRffZ1C-BvcUHI5JLq6wIBSfbg&e=>
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/attributes-cuda.cu?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_test_Index_attributes-2Dcuda.cu-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=9lgGnxaMaTne6u5dnj6LAjTqikN03kyo67ajgqFhssk&e=>
     >
    
==============================================================================
     > --- cfe/trunk/test/Index/attributes-cuda.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__attributes-2Dcuda.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=mTykXFMrswBOTJhzDBRffZ1C-BvcUHI5JLq6wIBSfbg&e=>
    (original)
     > +++ cfe/trunk/test/Index/attributes-cuda.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__attributes-2Dcuda.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=mTykXFMrswBOTJhzDBRffZ1C-BvcUHI5JLq6wIBSfbg&e=>
    Mon Jul 13 18:27:56 2015
     > @@ -1,4 +1,6 @@
     >  // RUN: c-index-test -test-load-source all -x cuda %s | FileCheck %s
     > +// RUN: c-index-test -test-load-source all -x cuda
    --cuda-host-only %s | FileCheck %s
     > +// RUN: c-index-test -test-load-source all -x cuda
    --cuda-device-only %s | FileCheck %s
     >
     >  __attribute__((device)) void f_device();
     >  __attribute__((global)) void f_global();
     > @@ -6,13 +8,13 @@ __attribute__((constant)) int* g_constan
     >  __attribute__((shared)) float *g_shared;
     >  __attribute__((host)) void f_host();
     >
     > -// CHECK:       attributes-cuda.cu:3:30: FunctionDecl=f_device:3:30
     > -// CHECK-NEXT:  attributes-cuda.cu:3:16: attribute(device)
     > -// CHECK:       attributes-cuda.cu:4:30: FunctionDecl=f_global:4:30
     > -// CHECK-NEXT:  attributes-cuda.cu:4:16: attribute(global)
     > -// CHECK:       attributes-cuda.cu:5:32: VarDecl=g_constant:5:32
    (Definition)
     > -// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(constant)
     > -// CHECK:       attributes-cuda.cu:6:32: VarDecl=g_shared:6:32
    (Definition)
     > -// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(shared)
     > -// CHECK:       attributes-cuda.cu:7:28: FunctionDecl=f_host:7:28
     > -// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(host)
     > +// CHECK:       attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30
     > +// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(device)
     > +// CHECK:       attributes-cuda.cu:6:30: FunctionDecl=f_global:6:30
     > +// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(global)
     > +// CHECK:       attributes-cuda.cu:7:32: VarDecl=g_constant:7:32
    (Definition)
     > +// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(constant)
     > +// CHECK:       attributes-cuda.cu:8:32: VarDecl=g_shared:8:32
    (Definition)
     > +// CHECK-NEXT:  attributes-cuda.cu:8:16: attribute(shared)
     > +// CHECK:       attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28
     > +// CHECK-NEXT:  attributes-cuda.cu:9:16: attribute(host)
     >
     > Added: cfe/trunk/test/Index/index-file.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__index-2Dfile.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=Z26JivvnfsF4tloeePDLQLHxsFQrEclL-D6_alidUsQ&e=>
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/index-file.cu?rev=242085&view=auto
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_test_Index_index-2Dfile.cu-3Frev-3D242085-26view-3Dauto&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=KT-XgqFAI7Vl93vVOFFXhEywRf8qjxjSFX-IW8oVbs0&e=>
     >
    
==============================================================================
     > --- cfe/trunk/test/Index/index-file.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__index-2Dfile.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=Z26JivvnfsF4tloeePDLQLHxsFQrEclL-D6_alidUsQ&e=>
    (added)
     > +++ cfe/trunk/test/Index/index-file.cu
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__index-2Dfile.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=Z26JivvnfsF4tloeePDLQLHxsFQrEclL-D6_alidUsQ&e=>
    Mon Jul 13 18:27:56 2015
     > @@ -0,0 +1,9 @@
     > +// Make sure we can process CUDA file even if driver creates
    multiple jobs
     > +// RUN: c-index-test -test-load-source all %s | FileCheck %s
    -check-prefix=CHECK-ANY
     > +// Make sure we process correct side of cuda compilation
     > +// RUN: c-index-test -test-load-source all --cuda-host-only %s |
    FileCheck %s -check-prefix=CHECK-HOST
     > +// RUN: c-index-test -test-load-source all --cuda-device-only %s
    | FileCheck %s -check-prefix=CHECK-DEVICE
     > +
     > +// CHECK-ANY: macro definition=__cplusplus
     > +// CHECK-HOST-NOT: macro definition=__CUDA_ARCH__
     > +// CHECK-DEVICE: macro definition=__CUDA_ARCH__
     >
     > Modified: cfe/trunk/tools/libclang/CIndex.cpp
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/CIndex.cpp?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_tools_libclang_CIndex.cpp-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=kEIx9idn_Ke-DCJSORFu3jCYCUWk4PlZRvVP-ltWRl8&e=>
     >
    
==============================================================================
     > --- cfe/trunk/tools/libclang/CIndex.cpp (original)
     > +++ cfe/trunk/tools/libclang/CIndex.cpp Mon Jul 13 18:27:56 2015
     > @@ -3102,6 +3102,12 @@ static void clang_parseTranslationUnit_I
     >        /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies,
     >        /*UserFilesAreVolatile=*/true, ForSerialization, &ErrUnit));
     >
     > +  // Early failures in LoadFromCommandLine may return with
    ErrUnit unset.
     > +  if (!Unit && !ErrUnit) {
     > +    PTUI->result = CXError_ASTReadError;
     > +    return;
     > +  }
     > +
     >    if (NumErrors != Diags->getClient()->getNumErrors()) {
     >      // Make sure to check that 'Unit' is non-NULL.
     >      if (CXXIdx->getDisplayDiagnostics())
     >
     > Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h
     > URL:
    
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h?rev=242085&r1=242084&r2=242085&view=diff
    
<https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_viewvc_llvm-2Dproject_cfe_trunk_unittests_ASTMatchers_ASTMatchersTest.h-3Frev-3D242085-26r1-3D242084-26r2-3D242085-26view-3Ddiff&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=I75miQAMK63tX32WH6iO2jq6asYQmsOGeyoe-6dPuuw&s=0VXYPHEwSel_CTLnuvdLSFw4gVs5G-ER5qz_NzOkvFM&e=>
     >
    
==============================================================================
     > --- cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h (original)
     > +++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h Mon Jul 13
    18:27:56 2015
     > @@ -164,6 +164,7 @@ testing::AssertionResult matchesConditio
     >    std::vector<std::string> Args;
     >    Args.push_back("-xcuda");
     >    Args.push_back("-fno-ms-extensions");
     > +  Args.push_back("--cuda-host-only");
     >    Args.push_back(CompileArg);
     >    if (!runToolOnCodeWithArgs(Factory->create(),
     >                               CudaHeader + Code, Args)) {
     >
     >
     > _______________________________________________
     > cfe-commits mailing list
     > cfe-commits@cs.uiuc.edu <mailto:cfe-commits@cs.uiuc.edu>
     > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
    _______________________________________________
    cfe-commits mailing list
    cfe-commits@cs.uiuc.edu <mailto:cfe-commits@cs.uiuc.edu>
    http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits



_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

--

Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,hosted by The Linux Foundation

_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: r242085 - [cuda] Driver changes to compile and stitch together host and device-side CUDA code.

Reply via email to