Re: r242058 - [cuda] Driver changes to compile and stitch together host and device-side CUDA code.

Artem Belevich Mon, 13 Jul 2015 15:49:46 -0700

OK. Thanks.

--Artem


On Mon, Jul 13, 2015 at 3:27 PM, Rafael Espíndola <
rafael.espind...@gmail.com> wrote:

> I reverted this as it was failing on OS X:
>
>
> http://lab.llvm.org:8080/green/job/clang-stage1-configure-RA_check/9068/console
>
> On 13 July 2015 at 13:21, Artem Belevich <t...@google.com> wrote:
> > Author: tra
> > Date: Mon Jul 13 15:21:06 2015
> > New Revision: 242058
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=242058&view=rev
> > Log:
> > [cuda] Driver changes to compile and stitch together host and
> device-side CUDA code.
> >
> >   - Changed driver pipeline to compile host and device side of CUDA
> >     files and incorporate results of device-side compilation into host
> >     object file.
> >
> >   - Added a test for cuda pipeline creation in clang driver.
> >
> >   New clang options:
> >   --cuda-host-only   - Do host-side compilation only.
> >   --cuda-device-only - Do device-side compilation only.
> >
> >   --cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side
> >     compilation. E.g. sm_35, sm_30. Default is sm_20. May be used more
> >     than once in which case one device-compilation will be done per
> >     unique specified GPU architecture.
> >
> >   Differential Revision: http://reviews.llvm.org/D9509
> >
> > Added:
> >     cfe/trunk/test/Driver/cuda-options.cu
> >     cfe/trunk/test/Index/index-file.cu
> > Modified:
> >     cfe/trunk/include/clang/Driver/Action.h
> >     cfe/trunk/include/clang/Driver/Options.td
> >     cfe/trunk/include/clang/Driver/Types.def
> >     cfe/trunk/include/clang/Driver/Types.h
> >     cfe/trunk/lib/Driver/Action.cpp
> >     cfe/trunk/lib/Driver/Driver.cpp
> >     cfe/trunk/lib/Driver/ToolChain.cpp
> >     cfe/trunk/lib/Driver/ToolChains.cpp
> >     cfe/trunk/lib/Driver/ToolChains.h
> >     cfe/trunk/lib/Driver/Tools.cpp
> >     cfe/trunk/lib/Driver/Types.cpp
> >     cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp
> >     cfe/trunk/test/Index/attributes-cuda.cu
> >     cfe/trunk/tools/libclang/CIndex.cpp
> >     cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h
> >
> > Modified: cfe/trunk/include/clang/Driver/Action.h
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/include/clang/Driver/Action.h (original)
> > +++ cfe/trunk/include/clang/Driver/Action.h Mon Jul 13 15:21:06 2015
> > @@ -41,6 +41,8 @@ public:
> >    enum ActionClass {
> >      InputClass = 0,
> >      BindArchClass,
> > +    CudaDeviceClass,
> > +    CudaHostClass,
> >      PreprocessJobClass,
> >      PrecompileJobClass,
> >      AnalyzeJobClass,
> > @@ -133,6 +135,41 @@ public:
> >    }
> >  };
> >
> > +class CudaDeviceAction : public Action {
> > +  virtual void anchor();
> > +  /// GPU architecture to bind -- e.g 'sm_35'.
> > +  const char *GpuArchName;
> > +  /// True when action results are not consumed by the host action (e.g
> when
> > +  /// -fsyntax-only or --cuda-device-only options are used).
> > +  bool AtTopLevel;
> > +
> > +public:
> > +  CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
> > +                   bool AtTopLevel);
> > +
> > +  const char *getGpuArchName() const { return GpuArchName; }
> > +  bool isAtTopLevel() const { return AtTopLevel; }
> > +
> > +  static bool classof(const Action *A) {
> > +    return A->getKind() == CudaDeviceClass;
> > +  }
> > +};
> > +
> > +class CudaHostAction : public Action {
> > +  virtual void anchor();
> > +  ActionList DeviceActions;
> > +
> > +public:
> > +  CudaHostAction(std::unique_ptr<Action> Input,
> > +                 const ActionList &DeviceActions);
> > +  ~CudaHostAction() override;
> > +
> > +  ActionList &getDeviceActions() { return DeviceActions; }
> > +  const ActionList &getDeviceActions() const { return DeviceActions; }
> > +
> > +  static bool classof(const Action *A) { return A->getKind() ==
> CudaHostClass; }
> > +};
> > +
> >  class JobAction : public Action {
> >    virtual void anchor();
> >  protected:
> >
> > Modified: cfe/trunk/include/clang/Driver/Options.td
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/include/clang/Driver/Options.td (original)
> > +++ cfe/trunk/include/clang/Driver/Options.td Mon Jul 13 15:21:06 2015
> > @@ -351,6 +351,12 @@ def cxx_isystem : JoinedOrSeparate<["-"]
> >    MetaVarName<"<directory>">;
> >  def c : Flag<["-"], "c">, Flags<[DriverOption]>,
> >    HelpText<"Only run preprocess, compile, and assemble steps">;
> > +def cuda_device_only : Flag<["--"], "cuda-device-only">,
> > +  HelpText<"Do device-side CUDA compilation only">;
> > +def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">,
> > +  Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">;
> > +def cuda_host_only : Flag<["--"], "cuda-host-only">,
> > +  HelpText<"Do host-side CUDA compilation only">;
> >  def dA : Flag<["-"], "dA">, Group<d_Group>;
> >  def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
> >    HelpText<"Print macro definitions in -E mode in addition to normal
> output">;
> >
> > Modified: cfe/trunk/include/clang/Driver/Types.def
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/include/clang/Driver/Types.def (original)
> > +++ cfe/trunk/include/clang/Driver/Types.def Mon Jul 13 15:21:06 2015
> > @@ -44,6 +44,7 @@ TYPE("c",                        C,
> >  TYPE("cl",                       CL,           PP_C,            "cl",
>   "u")
> >  TYPE("cuda-cpp-output",          PP_CUDA,      INVALID,         "cui",
>  "u")
> >  TYPE("cuda",                     CUDA,         PP_CUDA,         "cu",
>   "u")
> > +TYPE("cuda",                     CUDA_DEVICE,  PP_CUDA,         "cu",
>   "")
> >  TYPE("objective-c-cpp-output",   PP_ObjC,      INVALID,         "mi",
>   "u")
> >  TYPE("objc-cpp-output",          PP_ObjC_Alias, INVALID,        "mi",
>   "u")
> >  TYPE("objective-c",              ObjC,         PP_ObjC,         "m",
>  "u")
> >
> > Modified: cfe/trunk/include/clang/Driver/Types.h
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/include/clang/Driver/Types.h (original)
> > +++ cfe/trunk/include/clang/Driver/Types.h Mon Jul 13 15:21:06 2015
> > @@ -63,6 +63,9 @@ namespace types {
> >    /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and
> headers).
> >    bool isCXX(ID Id);
> >
> > +  /// isCuda - Is this a CUDA input.
> > +  bool isCuda(ID Id);
> > +
> >    /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and
> headers).
> >    bool isObjC(ID Id);
> >
> >
> > Modified: cfe/trunk/lib/Driver/Action.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/Action.cpp (original)
> > +++ cfe/trunk/lib/Driver/Action.cpp Mon Jul 13 15:21:06 2015
> > @@ -24,6 +24,8 @@ const char *Action::getClassName(ActionC
> >    switch (AC) {
> >    case InputClass: return "input";
> >    case BindArchClass: return "bind-arch";
> > +  case CudaDeviceClass: return "cuda-device";
> > +  case CudaHostClass: return "cuda-host";
> >    case PreprocessJobClass: return "preprocessor";
> >    case PrecompileJobClass: return "precompiler";
> >    case AnalyzeJobClass: return "analyzer";
> > @@ -53,6 +55,25 @@ BindArchAction::BindArchAction(std::uniq
> >                                 const char *_ArchName)
> >      : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {}
> >
> > +void CudaDeviceAction::anchor() {}
> > +
> > +CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
> > +                                   const char *ArchName, bool
> AtTopLevel)
> > +    : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
> > +      AtTopLevel(AtTopLevel) {}
> > +
> > +void CudaHostAction::anchor() {}
> > +
> > +CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
> > +                               const ActionList &_DeviceActions)
> > +    : Action(CudaHostClass, std::move(Input)),
> DeviceActions(_DeviceActions) {}
> > +
> > +CudaHostAction::~CudaHostAction() {
> > +  for (iterator it = DeviceActions.begin(), ie = DeviceActions.end();
> it != ie;
> > +       ++it)
> > +    delete *it;
> > +}
> > +
> >  void JobAction::anchor() {}
> >
> >  JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input,
> >
> > Modified: cfe/trunk/lib/Driver/Driver.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/Driver.cpp (original)
> > +++ cfe/trunk/lib/Driver/Driver.cpp Mon Jul 13 15:21:06 2015
> > @@ -174,8 +174,10 @@ phases::ID Driver::getFinalPhase(const D
> >    } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
> >      FinalPhase = phases::Backend;
> >
> > -    // -c only runs up to the assembler.
> > -  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
> > +    // -c and partial CUDA compilations only run up to the assembler.
> > +  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) ||
> > +             (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only))
> ||
> > +             (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) {
> >      FinalPhase = phases::Assemble;
> >
> >      // Otherwise do everything.
> > @@ -900,9 +902,20 @@ static unsigned PrintActions1(const Comp
> >    } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
> >      os << '"' << BIA->getArchName() << '"' << ", {"
> >         << PrintActions1(C, *BIA->begin(), Ids) << "}";
> > +  } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
> > +    os << '"' << CDA->getGpuArchName() << '"' << ", {"
> > +       << PrintActions1(C, *CDA->begin(), Ids) << "}";
> >    } else {
> > +    ActionList *AL;
> > +    if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
> > +      os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
> > +         << ", gpu binaries ";
> > +      AL = &CHA->getDeviceActions();
> > +    } else
> > +      AL = &A->getInputs();
> > +
> >      const char *Prefix = "{";
> > -    for (Action *PreRequisite : *A) {
> > +    for (Action *PreRequisite : *AL) {
> >        os << Prefix << PrintActions1(C, PreRequisite, Ids);
> >        Prefix = ", ";
> >      }
> > @@ -1215,6 +1228,93 @@ void Driver::BuildInputs(const ToolChain
> >    }
> >  }
> >
> > +// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE
> input
> > +// action and then wraps each in CudaDeviceAction paired with
> appropriate GPU
> > +// arch name. If we're only building device-side code, each action
> remains
> > +// independent. Otherwise we pass device-side actions as inputs to a new
> > +// CudaHostAction which combines both host and device side actions.
> > +static std::unique_ptr<Action>
> > +buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList
> &Args,
> > +                 const Arg *InputArg, const types::ID InputType,
> > +                 std::unique_ptr<Action> Current, ActionList &Actions) {
> > +
> > +  assert(InputType == types::TY_CUDA &&
> > +         "CUDA Actions only apply to CUDA inputs.");
> > +
> > +  // Collect all cuda_gpu_arch parameters, removing duplicates.
> > +  SmallVector<const char *, 4> GpuArchList;
> > +  llvm::StringSet<> GpuArchNames;
> > +  for (Arg *A : Args) {
> > +    if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) {
> > +      A->claim();
> > +      if (GpuArchNames.insert(A->getValue()).second)
> > +        GpuArchList.push_back(A->getValue());
> > +    }
> > +  }
> > +
> > +  // Default to sm_20 which is the lowest common denominator for
> supported GPUs.
> > +  // sm_20 code should work correctly, if suboptimally, on all newer
> GPUs.
> > +  if (GpuArchList.empty())
> > +    GpuArchList.push_back("sm_20");
> > +
> > +  // Replicate inputs for each GPU architecture.
> > +  Driver::InputList CudaDeviceInputs;
> > +  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
> > +    CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE,
> InputArg));
> > +
> > +  // Build actions for all device inputs.
> > +  ActionList CudaDeviceActions;
> > +  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
> > +  assert(GpuArchList.size() == CudaDeviceActions.size() &&
> > +         "Failed to create actions for all devices");
> > +
> > +  // Check whether any of device actions stopped before they could
> generate PTX.
> > +  bool PartialCompilation = false;
> > +  bool DeviceOnlyCompilation =
> Args.hasArg(options::OPT_cuda_device_only);
> > +  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) {
> > +    if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) {
> > +      PartialCompilation = true;
> > +      break;
> > +    }
> > +  }
> > +
> > +  // Figure out what to do with device actions -- pass them as inputs
> to the
> > +  // host action or run each of them independently.
> > +  if (PartialCompilation || DeviceOnlyCompilation) {
> > +    // In case of partial or device-only compilation results of device
> actions
> > +    // are not consumed by the host action device actions have to be
> added to
> > +    // top-level actions list with AtTopLevel=true and run
> independently.
> > +
> > +    // -o is ambiguous if we have more than one top-level action.
> > +    if (Args.hasArg(options::OPT_o) &&
> > +        (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
> > +      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
> > +      return nullptr;
> > +    }
> > +
> > +    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
> > +      Actions.push_back(
> > +          new
> CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
> > +                               GpuArchList[i], /* AtTopLevel */ true));
> > +    // Kill host action in case of device-only compilation.
> > +    if (DeviceOnlyCompilation)
> > +      Current.reset(nullptr);
> > +    return Current;
> > +  } else {
> > +    // Outputs of device actions during complete CUDA compilation get
> created
> > +    // with AtTopLevel=false and become inputs for the host action.
> > +    ActionList DeviceActions;
> > +    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
> > +      DeviceActions.push_back(
> > +          new
> CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
> > +                               GpuArchList[i], /* AtTopLevel */ false));
> > +    // Return a new host action that incorporates original host action
> and all
> > +    // device actions.
> > +    return std::unique_ptr<Action>(
> > +        new CudaHostAction(std::move(Current), DeviceActions));
> > +  }
> > +}
> > +
> >  void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
> >                            const InputList &Inputs, ActionList &Actions)
> const {
> >    llvm::PrettyStackTraceString CrashInfo("Building compilation
> actions");
> > @@ -1312,6 +1412,25 @@ void Driver::BuildActions(const ToolChai
> >        continue;
> >      }
> >
> > +    phases::ID CudaInjectionPhase;
> > +    if (isSaveTempsEnabled()) {
> > +      // All phases are done independently, inject GPU blobs during
> compilation
> > +      // phase as that's where we generate glue code to init them.
> > +      CudaInjectionPhase = phases::Compile;
> > +    } else {
> > +      // Assumes that clang does everything up until linking phase, so
> we inject
> > +      // cuda device actions at the last step before linking. Otherwise
> CUDA
> > +      // host action forces preprocessor into a separate invocation.
> > +      if (FinalPhase == phases::Link) {
> > +        for (auto i = PL.begin(), e = PL.end(); i != e; ++i) {
> > +          auto next = i + 1;
> > +          if (next != e && *next == phases::Link)
> > +            CudaInjectionPhase = *i;
> > +        }
> > +      } else
> > +        CudaInjectionPhase = FinalPhase;
> > +    }
> > +
> >      // Build the pipeline for this file.
> >      std::unique_ptr<Action> Current(new InputAction(*InputArg,
> InputType));
> >      for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e =
> PL.end();
> > @@ -1337,6 +1456,15 @@ void Driver::BuildActions(const ToolChai
> >
> >        // Otherwise construct the appropriate action.
> >        Current = ConstructPhaseAction(TC, Args, Phase,
> std::move(Current));
> > +
> > +      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase &&
> > +          !Args.hasArg(options::OPT_cuda_host_only)) {
> > +        Current = buildCudaActions(*this, TC, Args, InputArg, InputType,
> > +                                   std::move(Current), Actions);
> > +        if (!Current)
> > +          break;
> > +      }
> > +
> >        if (Current->getType() == types::TY_Nothing)
> >          break;
> >      }
> > @@ -1576,7 +1704,13 @@ static const Tool *SelectToolForJob(Comp
> >    if (isa<BackendJobAction>(JA)) {
> >      // Check if the compiler supports emitting LLVM IR.
> >      assert(Inputs->size() == 1);
> > -    JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin());
> > +    JobAction *CompileJA;
> > +    // Extract real host action, if it's a CudaHostAction.
> > +    if (CudaHostAction *CudaHA =
> dyn_cast<CudaHostAction>(*Inputs->begin()))
> > +      CompileJA = cast<CompileJobAction>(*CudaHA->begin());
> > +    else
> > +      CompileJA = cast<CompileJobAction>(*Inputs->begin());
> > +
> >      const Tool *Compiler = TC->SelectTool(*CompileJA);
> >      if (!Compiler)
> >        return nullptr;
> > @@ -1610,6 +1744,20 @@ void Driver::BuildJobsForAction(Compilat
> >                                  InputInfo &Result) const {
> >    llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
> >
> > +  InputInfoList CudaDeviceInputInfos;
> > +  if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
> > +    InputInfo II;
> > +    // Append outputs of device jobs to the input list.
> > +    for (const Action *DA : CHA->getDeviceActions()) {
> > +      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
> > +                         /*MultipleArchs*/ false, LinkingOutput, II);
> > +      CudaDeviceInputInfos.push_back(II);
> > +    }
> > +    // Override current action with a real host compile action and
> continue
> > +    // processing it.
> > +    A = *CHA->begin();
> > +  }
> > +
> >    if (const InputAction *IA = dyn_cast<InputAction>(A)) {
> >      // FIXME: It would be nice to not claim this here; maybe the old
> scheme of
> >      // just using Args was better?
> > @@ -1635,11 +1783,24 @@ void Driver::BuildJobsForAction(Compilat
> >      else
> >        TC = &C.getDefaultToolChain();
> >
> > -    BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(),
> AtTopLevel,
> > +    BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
> >                         MultipleArchs, LinkingOutput, Result);
> >      return;
> >    }
> >
> > +  if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
> > +    // Figure out which NVPTX triple to use for device-side compilation
> based on
> > +    // whether host is 64-bit.
> > +    llvm::Triple
> DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit()
> > +                                  ? "nvptx64-nvidia-cuda"
> > +                                  : "nvptx-nvidia-cuda");
> > +    BuildJobsForAction(C, *CDA->begin(),
> > +                       &getToolChain(C.getArgs(), DeviceTriple),
> > +                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
> > +                       /*MultipleArchs*/ true, LinkingOutput, Result);
> > +    return;
> > +  }
> > +
> >    const ActionList *Inputs = &A->getInputs();
> >
> >    const JobAction *JA = cast<JobAction>(A);
> > @@ -1671,6 +1832,10 @@ void Driver::BuildJobsForAction(Compilat
> >    if (JA->getType() == types::TY_dSYM)
> >      BaseInput = InputInfos[0].getFilename();
> >
> > +  // Append outputs of cuda device jobs to the input list
> > +  if (CudaDeviceInputInfos.size())
> > +    InputInfos.append(CudaDeviceInputInfos.begin(),
> CudaDeviceInputInfos.end());
> > +
> >    // Determine the place to write output to, if any.
> >    if (JA->getType() == types::TY_Nothing)
> >      Result = InputInfo(A->getType(), BaseInput);
> > @@ -2052,6 +2217,9 @@ const ToolChain &Driver::getToolChain(co
> >          break;
> >        }
> >        break;
> > +    case llvm::Triple::CUDA:
> > +      TC = new toolchains::CudaToolChain(*this, Target, Args);
> > +      break;
> >      default:
> >        // Of these targets, Hexagon is the only one that might have
> >        // an OS of Linux, in which case it got handled above already.
> >
> > Modified: cfe/trunk/lib/Driver/ToolChain.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChain.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/ToolChain.cpp (original)
> > +++ cfe/trunk/lib/Driver/ToolChain.cpp Mon Jul 13 15:21:06 2015
> > @@ -151,6 +151,8 @@ Tool *ToolChain::getTool(Action::ActionC
> >
> >    case Action::InputClass:
> >    case Action::BindArchClass:
> > +  case Action::CudaDeviceClass:
> > +  case Action::CudaHostClass:
> >    case Action::LipoJobClass:
> >    case Action::DsymutilJobClass:
> >    case Action::VerifyDebugInfoJobClass:
> >
> > Modified: cfe/trunk/lib/Driver/ToolChains.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/ToolChains.cpp (original)
> > +++ cfe/trunk/lib/Driver/ToolChains.cpp Mon Jul 13 15:21:06 2015
> > @@ -3652,6 +3652,65 @@ Tool *DragonFly::buildLinker() const {
> >    return new tools::dragonfly::Linker(*this);
> >  }
> >
> > +/// Stub for CUDA toolchain. At the moment we don't have assembler or
> > +/// linker and need toolchain mainly to propagate device-side options
> > +/// to CC1.
> > +
> > +CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple
> &Triple,
> > +                             const ArgList &Args)
> > +    : Linux(D, Triple, Args) {}
> > +
> > +void
> > +CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList
> &DriverArgs,
> > +                                     llvm::opt::ArgStringList &CC1Args)
> const {
> > +  Linux::addClangTargetOptions(DriverArgs, CC1Args);
> > +  CC1Args.push_back("-fcuda-is-device");
> > +}
> > +
> > +llvm::opt::DerivedArgList *
> > +CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
> > +                             const char *BoundArch) const {
> > +  DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
> > +  const OptTable &Opts = getDriver().getOpts();
> > +
> > +  for (Arg *A : Args) {
> > +    if (A->getOption().matches(options::OPT_Xarch__)) {
> > +      // Skip this argument unless the architecture matches BoundArch
> > +      if (A->getValue(0) != StringRef(BoundArch))
> > +        continue;
> > +
> > +      unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
> > +      unsigned Prev = Index;
> > +      std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
> > +
> > +      // If the argument parsing failed or more than one argument was
> > +      // consumed, the -Xarch_ argument's parameter tried to consume
> > +      // extra arguments. Emit an error and ignore.
> > +      //
> > +      // We also want to disallow any options which would alter the
> > +      // driver behavior; that isn't going to work in our model. We
> > +      // use isDriverOption() as an approximation, although things
> > +      // like -O4 are going to slip through.
> > +      if (!XarchArg || Index > Prev + 1) {
> > +        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
> > +            << A->getAsString(Args);
> > +        continue;
> > +      } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
> > +        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
> > +            << A->getAsString(Args);
> > +        continue;
> > +      }
> > +      XarchArg->setBaseArg(A);
> > +      A = XarchArg.release();
> > +      DAL->AddSynthesizedArg(A);
> > +    }
> > +    DAL->append(A);
> > +  }
> > +
> > +  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
> BoundArch);
> > +  return DAL;
> > +}
> > +
> >  /// XCore tool chain
> >  XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList
> &Args)
> >      : ToolChain(D, Triple, Args) {
> >
> > Modified: cfe/trunk/lib/Driver/ToolChains.h
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.h?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/ToolChains.h (original)
> > +++ cfe/trunk/lib/Driver/ToolChains.h Mon Jul 13 15:21:06 2015
> > @@ -699,6 +699,18 @@ private:
> >    std::string computeSysRoot() const;
> >  };
> >
> > +class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
> > +public:
> > +  CudaToolChain(const Driver &D, const llvm::Triple &Triple,
> > +                const llvm::opt::ArgList &Args);
> > +
> > +  llvm::opt::DerivedArgList *
> > +  TranslateArgs(const llvm::opt::DerivedArgList &Args,
> > +                const char *BoundArch) const override;
> > +  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
> > +                             llvm::opt::ArgStringList &CC1Args) const
> override;
> > +};
> > +
> >  class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux {
> >  protected:
> >    GCCVersion GCCLibAndIncVersion;
> >
> > Modified: cfe/trunk/lib/Driver/Tools.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/Tools.cpp (original)
> > +++ cfe/trunk/lib/Driver/Tools.cpp Mon Jul 13 15:21:06 2015
> > @@ -1488,6 +1488,12 @@ static std::string getCPUName(const ArgL
> >      return CPUName;
> >    }
> >
> > +  case llvm::Triple::nvptx:
> > +  case llvm::Triple::nvptx64:
> > +    if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
> > +      return A->getValue();
> > +    return "";
> > +
> >    case llvm::Triple::ppc:
> >    case llvm::Triple::ppc64:
> >    case llvm::Triple::ppc64le: {
> > @@ -2826,8 +2832,14 @@ void Clang::ConstructJob(Compilation &C,
> >        getToolChain().getTriple().isWindowsCygwinEnvironment();
> >    bool IsWindowsMSVC =
> getToolChain().getTriple().isWindowsMSVCEnvironment();
> >
> > -  assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
> > +  // Check number of inputs for sanity. We need at least one input.
> > +  assert(Inputs.size() >= 1 && "Must have at least one input.");
> >    const InputInfo &Input = Inputs[0];
> > +  // CUDA compilation may have multiple inputs (source file + results of
> > +  // device-side compilations). All other jobs are expected to have
> exactly one
> > +  // input.
> > +  bool IsCuda = types::isCuda(Input.getType());
> > +  assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple
> inputs.");
> >
> >    // Invoke ourselves in -cc1 mode.
> >    //
> > @@ -4802,14 +4814,12 @@ void Clang::ConstructJob(Compilation &C,
> >      assert(Output.isNothing() && "Invalid output.");
> >    }
> >
> > -  for (const auto &II : Inputs) {
> > -    addDashXForInput(Args, II, CmdArgs);
> > +  addDashXForInput(Args, Input, CmdArgs);
> >
> > -    if (II.isFilename())
> > -      CmdArgs.push_back(II.getFilename());
> > -    else
> > -      II.getInputArg().renderAsInput(Args, CmdArgs);
> > -  }
> > +  if (Input.isFilename())
> > +    CmdArgs.push_back(Input.getFilename());
> > +  else
> > +    Input.getInputArg().renderAsInput(Args, CmdArgs);
> >
> >    Args.AddAllArgs(CmdArgs, options::OPT_undef);
> >
> > @@ -4847,6 +4857,16 @@ void Clang::ConstructJob(Compilation &C,
> >      CmdArgs.push_back(SplitDwarfOut);
> >    }
> >
> > +  // Host-side cuda compilation receives device-side outputs as
> Inputs[1...].
> > +  // Include them with -fcuda-include-gpubinary.
> > +  if (IsCuda && Inputs.size() > 1)
> > +    for (InputInfoList::const_iterator it = std::next(Inputs.begin()),
> > +                                       ie = Inputs.end();
> > +         it != ie; ++it) {
> > +      CmdArgs.push_back("-fcuda-include-gpubinary");
> > +      CmdArgs.push_back(it->getFilename());
> > +    }
> > +
> >    // Finally add the compile command to the compilation.
> >    if (Args.hasArg(options::OPT__SLASH_fallback) &&
> >        Output.getType() == types::TY_Object &&
> >
> > Modified: cfe/trunk/lib/Driver/Types.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Types.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Driver/Types.cpp (original)
> > +++ cfe/trunk/lib/Driver/Types.cpp Mon Jul 13 15:21:06 2015
> > @@ -86,6 +86,7 @@ bool types::isAcceptedByClang(ID Id) {
> >    case TY_C: case TY_PP_C:
> >    case TY_CL:
> >    case TY_CUDA: case TY_PP_CUDA:
> > +  case TY_CUDA_DEVICE:
> >    case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias:
> >    case TY_CXX: case TY_PP_CXX:
> >    case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
> > @@ -122,7 +123,19 @@ bool types::isCXX(ID Id) {
> >    case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
> >    case TY_CXXHeader: case TY_PP_CXXHeader:
> >    case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
> > -  case TY_CUDA: case TY_PP_CUDA:
> > +  case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
> > +    return true;
> > +  }
> > +}
> > +
> > +bool types::isCuda(ID Id) {
> > +  switch (Id) {
> > +  default:
> > +    return false;
> > +
> > +  case TY_CUDA:
> > +  case TY_PP_CUDA:
> > +  case TY_CUDA_DEVICE:
> >      return true;
> >    }
> >  }
> > @@ -206,10 +219,12 @@ void types::getCompilationPhases(ID Id,
> >          P.push_back(phases::Compile);
> >          P.push_back(phases::Backend);
> >        }
> > -      P.push_back(phases::Assemble);
> > +      if (Id != TY_CUDA_DEVICE)
> > +        P.push_back(phases::Assemble);
> >      }
> >    }
> > -  if (!onlyPrecompileType(Id)) {
> > +
> > +  if (!onlyPrecompileType(Id) && Id != TY_CUDA_DEVICE) {
> >      P.push_back(phases::Link);
> >    }
> >    assert(0 < P.size() && "Not enough phases in list");
> >
> > Modified: cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp (original)
> > +++ cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp Mon Jul
> 13 15:21:06 2015
> > @@ -15,6 +15,7 @@
> >  #include "clang/Basic/DiagnosticOptions.h"
> >  #include "clang/Driver/Compilation.h"
> >  #include "clang/Driver/Driver.h"
> > +#include "clang/Driver/Action.h"
> >  #include "clang/Driver/Options.h"
> >  #include "clang/Driver/Tool.h"
> >  #include "clang/Frontend/CompilerInstance.h"
> > @@ -61,9 +62,21 @@ clang::createInvocationFromCommandLine(A
> >    }
> >
> >    // We expect to get back exactly one command job, if we didn't
> something
> > -  // failed.
> > +  // failed. CUDA compilation is an exception as it creates multiple
> jobs. If
> > +  // that's the case, we proceed with the first job. If caller needs
> particular
> > +  // CUDA job, it should be controlled via --cuda-{host|device}-only
> option
> > +  // passed to the driver.
> >    const driver::JobList &Jobs = C->getJobs();
> > -  if (Jobs.size() != 1 || !isa<driver::Command>(*Jobs.begin())) {
> > +  bool CudaCompilation = false;
> > +  if (Jobs.size() > 1) {
> > +    for (auto &A : C->getActions())
> > +      if (isa<driver::CudaDeviceAction>(A)) {
> > +        CudaCompilation = true;
> > +        break;
> > +      }
> > +  }
> > +  if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) ||
> > +      (Jobs.size() > 1 && !CudaCompilation)) {
> >      SmallString<256> Msg;
> >      llvm::raw_svector_ostream OS(Msg);
> >      Jobs.Print(OS, "; ", true);
> >
> > Added: cfe/trunk/test/Driver/cuda-options.cu
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=242058&view=auto
> >
> ==============================================================================
> > --- cfe/trunk/test/Driver/cuda-options.cu (added)
> > +++ cfe/trunk/test/Driver/cuda-options.cu Mon Jul 13 15:21:06 2015
> > @@ -0,0 +1,108 @@
> > +// Tests CUDA compilation pipeline construction in Driver.
> > +
> > +// Simple compilation case:
> > +// RUN: %clang -### -c %s 2>&1 \
> > +// Compile device-side to PTX assembly and make sure we use it on the
> host side.
> > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
> > +// Then compile host side and incorporate device code.
> > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
> > +// Make sure we don't link anything.
> > +// RUN:   -check-prefix CUDA-NL %s
> > +
> > +// Typical compilation + link case:
> > +// RUN: %clang -### %s 2>&1 \
> > +// Compile device-side to PTX assembly and make sure we use it on the
> host side
> > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
> > +// Then compile host side and incorporate device code.
> > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
> > +// Then link things.
> > +// RUN:   -check-prefix CUDA-L %s
> > +
> > +// Verify that -cuda-no-device disables device-side compilation and
> linking
> > +// RUN: %clang -### --cuda-host-only %s 2>&1 \
> > +// Make sure we didn't run device-side compilation.
> > +// RUN:   | FileCheck -check-prefix CUDA-ND \
> > +// Then compile host side and make sure we don't attempt to incorporate
> GPU code.
> > +// RUN:    -check-prefix CUDA-H -check-prefix CUDA-H-NI \
> > +// Make sure we don't link anything.
> > +// RUN:    -check-prefix CUDA-NL %s
> > +
> > +// Verify that -cuda-no-host disables host-side compilation and linking
> > +// RUN: %clang -### --cuda-device-only %s 2>&1 \
> > +// Compile device-side to PTX assembly
> > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
> > +// Make sure there are no host cmpilation or linking.
> > +// RUN:   -check-prefix CUDA-NH -check-prefix CUDA-NL %s
> > +
> > +// Verify that with -S we compile host and device sides to assembly
> > +// and incorporate device code on the host side.
> > +// RUN: %clang -### -S -c %s 2>&1 \
> > +// Compile device-side to PTX assembly
> > +// RUN:   | FileCheck -check-prefix CUDA-D1 \
> > +// Then compile host side and incorporate GPU code.
> > +// RUN:  -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
> > +// Make sure we don't link anything.
> > +// RUN:  -check-prefix CUDA-NL %s
> > +
> > +// Verify that --cuda-gpu-arch option passes correct GPU
> > +// archtecture info to device compilation.
> > +// RUN: %clang -### --cuda-gpu-arch=sm_35 -c %s 2>&1 \
> > +// Compile device-side to PTX assembly.
> > +// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
> > +// Then compile host side and incorporate GPU code.
> > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
> > +// Make sure we don't link anything.
> > +// RUN:   -check-prefix CUDA-NL %s
> > +
> > +// Verify that there is device-side compilation per --cuda-gpu-arch args
> > +// and that all results are included on the host side.
> > +// RUN: %clang -### --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s
> 2>&1 \
> > +// Compile both device-sides to PTX assembly
> > +// RUN:   | FileCheck \
> > +// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
> > +// RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \
> > +// Then compile host side and incorporate both device-side outputs
> > +// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 -check-prefix
> CUDA-H-I2 \
> > +// Make sure we don't link anything.
> > +// RUN:   -check-prefix CUDA-NL %s
> > +
> > +// Match device-side compilation
> > +// CUDA-D1: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
> > +// CUDA-D1-SAME: "-fcuda-is-device"
> > +// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
> > +// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
> > +// CUDA-D1-SAME: "-x" "cuda"
> > +
> > +// Match anothe device-side compilation
> > +// CUDA-D2: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
> > +// CUDA-D2-SAME: "-fcuda-is-device"
> > +// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
> > +// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
> > +// CUDA-D2-SAME: "-x" "cuda"
> > +
> > +// Match no device-side compilation
> > +// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
> > +// CUDA-ND-SAME-NOT: "-fcuda-is-device"
> > +
> > +// Match host-side compilation
> > +// CUDA-H: "-cc1" "-triple"
> > +// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda"
> > +// CUDA-H-SAME-NOT: "-fcuda-is-device"
> > +// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]"
> > +// CUDA-H-SAME: "-x" "cuda"
> > +// CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]"
> > +// CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]"
> > +
> > +// Match no GPU code inclusion.
> > +// CUDA-H-NI-NOT: "-fcuda-include-gpubinary"
> > +
> > +// Match no CUDA compilation
> > +// CUDA-NH-NOT: "-cc1" "-triple"
> > +// CUDA-NH-SAME-NOT: "-x" "cuda"
> > +
> > +// Match linker
> > +// CUDA-L: "{{.*}}ld{{(.exe)?}}"
> > +// CUDA-L-SAME: "[[HOSTOBJ]]"
> > +
> > +// Match no linker
> > +// CUDA-NL-NOT: "{{.*}}ld{{(.exe)?}}"
> >
> > Modified: cfe/trunk/test/Index/attributes-cuda.cu
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/attributes-cuda.cu?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/test/Index/attributes-cuda.cu (original)
> > +++ cfe/trunk/test/Index/attributes-cuda.cu Mon Jul 13 15:21:06 2015
> > @@ -1,4 +1,6 @@
> >  // RUN: c-index-test -test-load-source all -x cuda %s | FileCheck %s
> > +// RUN: c-index-test -test-load-source all -x cuda --cuda-host-only %s
> | FileCheck %s
> > +// RUN: c-index-test -test-load-source all -x cuda --cuda-device-only
> %s | FileCheck %s
> >
> >  __attribute__((device)) void f_device();
> >  __attribute__((global)) void f_global();
> > @@ -6,13 +8,13 @@ __attribute__((constant)) int* g_constan
> >  __attribute__((shared)) float *g_shared;
> >  __attribute__((host)) void f_host();
> >
> > -// CHECK:       attributes-cuda.cu:3:30: FunctionDecl=f_device:3:30
> > -// CHECK-NEXT:  attributes-cuda.cu:3:16: attribute(device)
> > -// CHECK:       attributes-cuda.cu:4:30: FunctionDecl=f_global:4:30
> > -// CHECK-NEXT:  attributes-cuda.cu:4:16: attribute(global)
> > -// CHECK:       attributes-cuda.cu:5:32: VarDecl=g_constant:5:32
> (Definition)
> > -// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(constant)
> > -// CHECK:       attributes-cuda.cu:6:32: VarDecl=g_shared:6:32
> (Definition)
> > -// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(shared)
> > -// CHECK:       attributes-cuda.cu:7:28: FunctionDecl=f_host:7:28
> > -// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(host)
> > +// CHECK:       attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30
> > +// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(device)
> > +// CHECK:       attributes-cuda.cu:6:30: FunctionDecl=f_global:6:30
> > +// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(global)
> > +// CHECK:       attributes-cuda.cu:7:32: VarDecl=g_constant:7:32
> (Definition)
> > +// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(constant)
> > +// CHECK:       attributes-cuda.cu:8:32: VarDecl=g_shared:8:32
> (Definition)
> > +// CHECK-NEXT:  attributes-cuda.cu:8:16: attribute(shared)
> > +// CHECK:       attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28
> > +// CHECK-NEXT:  attributes-cuda.cu:9:16: attribute(host)
> >
> > Added: cfe/trunk/test/Index/index-file.cu
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/index-file.cu?rev=242058&view=auto
> >
> ==============================================================================
> > --- cfe/trunk/test/Index/index-file.cu (added)
> > +++ cfe/trunk/test/Index/index-file.cu Mon Jul 13 15:21:06 2015
> > @@ -0,0 +1,9 @@
> > +// Make sure we can process CUDA file even if driver creates multiple
> jobs
> > +// RUN: c-index-test -test-load-source all %s | FileCheck %s
> -check-prefix=CHECK-ANY
> > +// Make sure we process correct side of cuda compilation
> > +// RUN: c-index-test -test-load-source all --cuda-host-only %s |
> FileCheck %s -check-prefix=CHECK-HOST
> > +// RUN: c-index-test -test-load-source all --cuda-device-only %s |
> FileCheck %s -check-prefix=CHECK-DEVICE
> > +
> > +// CHECK-ANY: macro definition=__cplusplus
> > +// CHECK-HOST-NOT: macro definition=__CUDA_ARCH__
> > +// CHECK-DEVICE: macro definition=__CUDA_ARCH__
> >
> > Modified: cfe/trunk/tools/libclang/CIndex.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/CIndex.cpp?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/tools/libclang/CIndex.cpp (original)
> > +++ cfe/trunk/tools/libclang/CIndex.cpp Mon Jul 13 15:21:06 2015
> > @@ -3102,6 +3102,12 @@ static void clang_parseTranslationUnit_I
> >        /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies,
> >        /*UserFilesAreVolatile=*/true, ForSerialization, &ErrUnit));
> >
> > +  // Early failures in LoadFromCommandLine may return with ErrUnit
> unset.
> > +  if (!Unit && !ErrUnit) {
> > +    PTUI->result = CXError_ASTReadError;
> > +    return;
> > +  }
> > +
> >    if (NumErrors != Diags->getClient()->getNumErrors()) {
> >      // Make sure to check that 'Unit' is non-NULL.
> >      if (CXXIdx->getDisplayDiagnostics())
> >
> > Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h?rev=242058&r1=242057&r2=242058&view=diff
> >
> ==============================================================================
> > --- cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h (original)
> > +++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h Mon Jul 13
> 15:21:06 2015
> > @@ -164,6 +164,7 @@ testing::AssertionResult matchesConditio
> >    std::vector<std::string> Args;
> >    Args.push_back("-xcuda");
> >    Args.push_back("-fno-ms-extensions");
> > +  Args.push_back("--cuda-host-only");
> >    Args.push_back(CompileArg);
> >    if (!runToolOnCodeWithArgs(Factory->create(),
> >                               CudaHeader + Code, Args)) {
> >
> >
> > _______________________________________________
> > cfe-commits mailing list
> > cfe-commits@cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>



-- 
--Artem Belevich

_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: r242058 - [cuda] Driver changes to compile and stitch together host and device-side CUDA code.

Reply via email to