I reverted this as it was failing on OS X: http://lab.llvm.org:8080/green/job/clang-stage1-configure-RA_check/9068/console
On 13 July 2015 at 13:21, Artem Belevich <t...@google.com> wrote: > Author: tra > Date: Mon Jul 13 15:21:06 2015 > New Revision: 242058 > > URL: http://llvm.org/viewvc/llvm-project?rev=242058&view=rev > Log: > [cuda] Driver changes to compile and stitch together host and device-side > CUDA code. > > - Changed driver pipeline to compile host and device side of CUDA > files and incorporate results of device-side compilation into host > object file. > > - Added a test for cuda pipeline creation in clang driver. > > New clang options: > --cuda-host-only - Do host-side compilation only. > --cuda-device-only - Do device-side compilation only. > > --cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side > compilation. E.g. sm_35, sm_30. Default is sm_20. May be used more > than once in which case one device-compilation will be done per > unique specified GPU architecture. > > Differential Revision: http://reviews.llvm.org/D9509 > > Added: > cfe/trunk/test/Driver/cuda-options.cu > cfe/trunk/test/Index/index-file.cu > Modified: > cfe/trunk/include/clang/Driver/Action.h > cfe/trunk/include/clang/Driver/Options.td > cfe/trunk/include/clang/Driver/Types.def > cfe/trunk/include/clang/Driver/Types.h > cfe/trunk/lib/Driver/Action.cpp > cfe/trunk/lib/Driver/Driver.cpp > cfe/trunk/lib/Driver/ToolChain.cpp > cfe/trunk/lib/Driver/ToolChains.cpp > cfe/trunk/lib/Driver/ToolChains.h > cfe/trunk/lib/Driver/Tools.cpp > cfe/trunk/lib/Driver/Types.cpp > cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp > cfe/trunk/test/Index/attributes-cuda.cu > cfe/trunk/tools/libclang/CIndex.cpp > cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h > > Modified: cfe/trunk/include/clang/Driver/Action.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/include/clang/Driver/Action.h (original) > +++ cfe/trunk/include/clang/Driver/Action.h Mon Jul 13 15:21:06 2015 > @@ -41,6 +41,8 @@ public: > enum ActionClass { > InputClass = 0, > BindArchClass, > + CudaDeviceClass, > + CudaHostClass, > PreprocessJobClass, > PrecompileJobClass, > AnalyzeJobClass, > @@ -133,6 +135,41 @@ public: > } > }; > > +class CudaDeviceAction : public Action { > + virtual void anchor(); > + /// GPU architecture to bind -- e.g 'sm_35'. > + const char *GpuArchName; > + /// True when action results are not consumed by the host action (e.g when > + /// -fsyntax-only or --cuda-device-only options are used). > + bool AtTopLevel; > + > +public: > + CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName, > + bool AtTopLevel); > + > + const char *getGpuArchName() const { return GpuArchName; } > + bool isAtTopLevel() const { return AtTopLevel; } > + > + static bool classof(const Action *A) { > + return A->getKind() == CudaDeviceClass; > + } > +}; > + > +class CudaHostAction : public Action { > + virtual void anchor(); > + ActionList DeviceActions; > + > +public: > + CudaHostAction(std::unique_ptr<Action> Input, > + const ActionList &DeviceActions); > + ~CudaHostAction() override; > + > + ActionList &getDeviceActions() { return DeviceActions; } > + const ActionList &getDeviceActions() const { return DeviceActions; } > + > + static bool classof(const Action *A) { return A->getKind() == > CudaHostClass; } > +}; > + > class JobAction : public Action { > virtual void anchor(); > protected: > > Modified: cfe/trunk/include/clang/Driver/Options.td > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/include/clang/Driver/Options.td (original) > +++ cfe/trunk/include/clang/Driver/Options.td Mon Jul 13 15:21:06 2015 > @@ -351,6 +351,12 @@ def cxx_isystem : JoinedOrSeparate<["-"] > MetaVarName<"<directory>">; > def c : Flag<["-"], "c">, Flags<[DriverOption]>, > HelpText<"Only run preprocess, compile, and assemble steps">; > +def cuda_device_only : Flag<["--"], "cuda-device-only">, > + HelpText<"Do device-side CUDA compilation only">; > +def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, > + Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">; > +def cuda_host_only : Flag<["--"], "cuda-host-only">, > + HelpText<"Do host-side CUDA compilation only">; > def dA : Flag<["-"], "dA">, Group<d_Group>; > def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>, > HelpText<"Print macro definitions in -E mode in addition to normal > output">; > > Modified: cfe/trunk/include/clang/Driver/Types.def > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/include/clang/Driver/Types.def (original) > +++ cfe/trunk/include/clang/Driver/Types.def Mon Jul 13 15:21:06 2015 > @@ -44,6 +44,7 @@ TYPE("c", C, > TYPE("cl", CL, PP_C, "cl", "u") > TYPE("cuda-cpp-output", PP_CUDA, INVALID, "cui", "u") > TYPE("cuda", CUDA, PP_CUDA, "cu", "u") > +TYPE("cuda", CUDA_DEVICE, PP_CUDA, "cu", "") > TYPE("objective-c-cpp-output", PP_ObjC, INVALID, "mi", "u") > TYPE("objc-cpp-output", PP_ObjC_Alias, INVALID, "mi", "u") > TYPE("objective-c", ObjC, PP_ObjC, "m", "u") > > Modified: cfe/trunk/include/clang/Driver/Types.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/include/clang/Driver/Types.h (original) > +++ cfe/trunk/include/clang/Driver/Types.h Mon Jul 13 15:21:06 2015 > @@ -63,6 +63,9 @@ namespace types { > /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and headers). > bool isCXX(ID Id); > > + /// isCuda - Is this a CUDA input. > + bool isCuda(ID Id); > + > /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and > headers). > bool isObjC(ID Id); > > > Modified: cfe/trunk/lib/Driver/Action.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/Action.cpp (original) > +++ cfe/trunk/lib/Driver/Action.cpp Mon Jul 13 15:21:06 2015 > @@ -24,6 +24,8 @@ const char *Action::getClassName(ActionC > switch (AC) { > case InputClass: return "input"; > case BindArchClass: return "bind-arch"; > + case CudaDeviceClass: return "cuda-device"; > + case CudaHostClass: return "cuda-host"; > case PreprocessJobClass: return "preprocessor"; > case PrecompileJobClass: return "precompiler"; > case AnalyzeJobClass: return "analyzer"; > @@ -53,6 +55,25 @@ BindArchAction::BindArchAction(std::uniq > const char *_ArchName) > : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {} > > +void CudaDeviceAction::anchor() {} > + > +CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input, > + const char *ArchName, bool AtTopLevel) > + : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName), > + AtTopLevel(AtTopLevel) {} > + > +void CudaHostAction::anchor() {} > + > +CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input, > + const ActionList &_DeviceActions) > + : Action(CudaHostClass, std::move(Input)), DeviceActions(_DeviceActions) > {} > + > +CudaHostAction::~CudaHostAction() { > + for (iterator it = DeviceActions.begin(), ie = DeviceActions.end(); it != > ie; > + ++it) > + delete *it; > +} > + > void JobAction::anchor() {} > > JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input, > > Modified: cfe/trunk/lib/Driver/Driver.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/Driver.cpp (original) > +++ cfe/trunk/lib/Driver/Driver.cpp Mon Jul 13 15:21:06 2015 > @@ -174,8 +174,10 @@ phases::ID Driver::getFinalPhase(const D > } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { > FinalPhase = phases::Backend; > > - // -c only runs up to the assembler. > - } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { > + // -c and partial CUDA compilations only run up to the assembler. > + } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) || > + (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) || > + (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) { > FinalPhase = phases::Assemble; > > // Otherwise do everything. > @@ -900,9 +902,20 @@ static unsigned PrintActions1(const Comp > } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) { > os << '"' << BIA->getArchName() << '"' << ", {" > << PrintActions1(C, *BIA->begin(), Ids) << "}"; > + } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { > + os << '"' << CDA->getGpuArchName() << '"' << ", {" > + << PrintActions1(C, *CDA->begin(), Ids) << "}"; > } else { > + ActionList *AL; > + if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { > + os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}" > + << ", gpu binaries "; > + AL = &CHA->getDeviceActions(); > + } else > + AL = &A->getInputs(); > + > const char *Prefix = "{"; > - for (Action *PreRequisite : *A) { > + for (Action *PreRequisite : *AL) { > os << Prefix << PrintActions1(C, PreRequisite, Ids); > Prefix = ", "; > } > @@ -1215,6 +1228,93 @@ void Driver::BuildInputs(const ToolChain > } > } > > +// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE input > +// action and then wraps each in CudaDeviceAction paired with appropriate GPU > +// arch name. If we're only building device-side code, each action remains > +// independent. Otherwise we pass device-side actions as inputs to a new > +// CudaHostAction which combines both host and device side actions. > +static std::unique_ptr<Action> > +buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args, > + const Arg *InputArg, const types::ID InputType, > + std::unique_ptr<Action> Current, ActionList &Actions) { > + > + assert(InputType == types::TY_CUDA && > + "CUDA Actions only apply to CUDA inputs."); > + > + // Collect all cuda_gpu_arch parameters, removing duplicates. > + SmallVector<const char *, 4> GpuArchList; > + llvm::StringSet<> GpuArchNames; > + for (Arg *A : Args) { > + if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) { > + A->claim(); > + if (GpuArchNames.insert(A->getValue()).second) > + GpuArchList.push_back(A->getValue()); > + } > + } > + > + // Default to sm_20 which is the lowest common denominator for supported > GPUs. > + // sm_20 code should work correctly, if suboptimally, on all newer GPUs. > + if (GpuArchList.empty()) > + GpuArchList.push_back("sm_20"); > + > + // Replicate inputs for each GPU architecture. > + Driver::InputList CudaDeviceInputs; > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) > + CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, > InputArg)); > + > + // Build actions for all device inputs. > + ActionList CudaDeviceActions; > + D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions); > + assert(GpuArchList.size() == CudaDeviceActions.size() && > + "Failed to create actions for all devices"); > + > + // Check whether any of device actions stopped before they could generate > PTX. > + bool PartialCompilation = false; > + bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only); > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) { > + if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) { > + PartialCompilation = true; > + break; > + } > + } > + > + // Figure out what to do with device actions -- pass them as inputs to the > + // host action or run each of them independently. > + if (PartialCompilation || DeviceOnlyCompilation) { > + // In case of partial or device-only compilation results of device > actions > + // are not consumed by the host action device actions have to be added to > + // top-level actions list with AtTopLevel=true and run independently. > + > + // -o is ambiguous if we have more than one top-level action. > + if (Args.hasArg(options::OPT_o) && > + (!DeviceOnlyCompilation || GpuArchList.size() > 1)) { > + D.Diag(clang::diag::err_drv_output_argument_with_multiple_files); > + return nullptr; > + } > + > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) > + Actions.push_back( > + new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]), > + GpuArchList[i], /* AtTopLevel */ true)); > + // Kill host action in case of device-only compilation. > + if (DeviceOnlyCompilation) > + Current.reset(nullptr); > + return Current; > + } else { > + // Outputs of device actions during complete CUDA compilation get created > + // with AtTopLevel=false and become inputs for the host action. > + ActionList DeviceActions; > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) > + DeviceActions.push_back( > + new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]), > + GpuArchList[i], /* AtTopLevel */ false)); > + // Return a new host action that incorporates original host action and > all > + // device actions. > + return std::unique_ptr<Action>( > + new CudaHostAction(std::move(Current), DeviceActions)); > + } > +} > + > void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, > const InputList &Inputs, ActionList &Actions) > const { > llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); > @@ -1312,6 +1412,25 @@ void Driver::BuildActions(const ToolChai > continue; > } > > + phases::ID CudaInjectionPhase; > + if (isSaveTempsEnabled()) { > + // All phases are done independently, inject GPU blobs during > compilation > + // phase as that's where we generate glue code to init them. > + CudaInjectionPhase = phases::Compile; > + } else { > + // Assumes that clang does everything up until linking phase, so we > inject > + // cuda device actions at the last step before linking. Otherwise CUDA > + // host action forces preprocessor into a separate invocation. > + if (FinalPhase == phases::Link) { > + for (auto i = PL.begin(), e = PL.end(); i != e; ++i) { > + auto next = i + 1; > + if (next != e && *next == phases::Link) > + CudaInjectionPhase = *i; > + } > + } else > + CudaInjectionPhase = FinalPhase; > + } > + > // Build the pipeline for this file. > std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType)); > for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end(); > @@ -1337,6 +1456,15 @@ void Driver::BuildActions(const ToolChai > > // Otherwise construct the appropriate action. > Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); > + > + if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase && > + !Args.hasArg(options::OPT_cuda_host_only)) { > + Current = buildCudaActions(*this, TC, Args, InputArg, InputType, > + std::move(Current), Actions); > + if (!Current) > + break; > + } > + > if (Current->getType() == types::TY_Nothing) > break; > } > @@ -1576,7 +1704,13 @@ static const Tool *SelectToolForJob(Comp > if (isa<BackendJobAction>(JA)) { > // Check if the compiler supports emitting LLVM IR. > assert(Inputs->size() == 1); > - JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin()); > + JobAction *CompileJA; > + // Extract real host action, if it's a CudaHostAction. > + if (CudaHostAction *CudaHA = dyn_cast<CudaHostAction>(*Inputs->begin())) > + CompileJA = cast<CompileJobAction>(*CudaHA->begin()); > + else > + CompileJA = cast<CompileJobAction>(*Inputs->begin()); > + > const Tool *Compiler = TC->SelectTool(*CompileJA); > if (!Compiler) > return nullptr; > @@ -1610,6 +1744,20 @@ void Driver::BuildJobsForAction(Compilat > InputInfo &Result) const { > llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); > > + InputInfoList CudaDeviceInputInfos; > + if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { > + InputInfo II; > + // Append outputs of device jobs to the input list. > + for (const Action *DA : CHA->getDeviceActions()) { > + BuildJobsForAction(C, DA, TC, "", AtTopLevel, > + /*MultipleArchs*/ false, LinkingOutput, II); > + CudaDeviceInputInfos.push_back(II); > + } > + // Override current action with a real host compile action and continue > + // processing it. > + A = *CHA->begin(); > + } > + > if (const InputAction *IA = dyn_cast<InputAction>(A)) { > // FIXME: It would be nice to not claim this here; maybe the old scheme > of > // just using Args was better? > @@ -1635,11 +1783,24 @@ void Driver::BuildJobsForAction(Compilat > else > TC = &C.getDefaultToolChain(); > > - BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), AtTopLevel, > + BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel, > MultipleArchs, LinkingOutput, Result); > return; > } > > + if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { > + // Figure out which NVPTX triple to use for device-side compilation > based on > + // whether host is 64-bit. > + llvm::Triple > DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit() > + ? "nvptx64-nvidia-cuda" > + : "nvptx-nvidia-cuda"); > + BuildJobsForAction(C, *CDA->begin(), > + &getToolChain(C.getArgs(), DeviceTriple), > + CDA->getGpuArchName(), CDA->isAtTopLevel(), > + /*MultipleArchs*/ true, LinkingOutput, Result); > + return; > + } > + > const ActionList *Inputs = &A->getInputs(); > > const JobAction *JA = cast<JobAction>(A); > @@ -1671,6 +1832,10 @@ void Driver::BuildJobsForAction(Compilat > if (JA->getType() == types::TY_dSYM) > BaseInput = InputInfos[0].getFilename(); > > + // Append outputs of cuda device jobs to the input list > + if (CudaDeviceInputInfos.size()) > + InputInfos.append(CudaDeviceInputInfos.begin(), > CudaDeviceInputInfos.end()); > + > // Determine the place to write output to, if any. > if (JA->getType() == types::TY_Nothing) > Result = InputInfo(A->getType(), BaseInput); > @@ -2052,6 +2217,9 @@ const ToolChain &Driver::getToolChain(co > break; > } > break; > + case llvm::Triple::CUDA: > + TC = new toolchains::CudaToolChain(*this, Target, Args); > + break; > default: > // Of these targets, Hexagon is the only one that might have > // an OS of Linux, in which case it got handled above already. > > Modified: cfe/trunk/lib/Driver/ToolChain.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChain.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/ToolChain.cpp (original) > +++ cfe/trunk/lib/Driver/ToolChain.cpp Mon Jul 13 15:21:06 2015 > @@ -151,6 +151,8 @@ Tool *ToolChain::getTool(Action::ActionC > > case Action::InputClass: > case Action::BindArchClass: > + case Action::CudaDeviceClass: > + case Action::CudaHostClass: > case Action::LipoJobClass: > case Action::DsymutilJobClass: > case Action::VerifyDebugInfoJobClass: > > Modified: cfe/trunk/lib/Driver/ToolChains.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/ToolChains.cpp (original) > +++ cfe/trunk/lib/Driver/ToolChains.cpp Mon Jul 13 15:21:06 2015 > @@ -3652,6 +3652,65 @@ Tool *DragonFly::buildLinker() const { > return new tools::dragonfly::Linker(*this); > } > > +/// Stub for CUDA toolchain. At the moment we don't have assembler or > +/// linker and need toolchain mainly to propagate device-side options > +/// to CC1. > + > +CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, > + const ArgList &Args) > + : Linux(D, Triple, Args) {} > + > +void > +CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, > + llvm::opt::ArgStringList &CC1Args) > const { > + Linux::addClangTargetOptions(DriverArgs, CC1Args); > + CC1Args.push_back("-fcuda-is-device"); > +} > + > +llvm::opt::DerivedArgList * > +CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, > + const char *BoundArch) const { > + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); > + const OptTable &Opts = getDriver().getOpts(); > + > + for (Arg *A : Args) { > + if (A->getOption().matches(options::OPT_Xarch__)) { > + // Skip this argument unless the architecture matches BoundArch > + if (A->getValue(0) != StringRef(BoundArch)) > + continue; > + > + unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); > + unsigned Prev = Index; > + std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index)); > + > + // If the argument parsing failed or more than one argument was > + // consumed, the -Xarch_ argument's parameter tried to consume > + // extra arguments. Emit an error and ignore. > + // > + // We also want to disallow any options which would alter the > + // driver behavior; that isn't going to work in our model. We > + // use isDriverOption() as an approximation, although things > + // like -O4 are going to slip through. > + if (!XarchArg || Index > Prev + 1) { > + getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args) > + << A->getAsString(Args); > + continue; > + } else if (XarchArg->getOption().hasFlag(options::DriverOption)) { > + getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver) > + << A->getAsString(Args); > + continue; > + } > + XarchArg->setBaseArg(A); > + A = XarchArg.release(); > + DAL->AddSynthesizedArg(A); > + } > + DAL->append(A); > + } > + > + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), > BoundArch); > + return DAL; > +} > + > /// XCore tool chain > XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList > &Args) > : ToolChain(D, Triple, Args) { > > Modified: cfe/trunk/lib/Driver/ToolChains.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.h?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/ToolChains.h (original) > +++ cfe/trunk/lib/Driver/ToolChains.h Mon Jul 13 15:21:06 2015 > @@ -699,6 +699,18 @@ private: > std::string computeSysRoot() const; > }; > > +class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux { > +public: > + CudaToolChain(const Driver &D, const llvm::Triple &Triple, > + const llvm::opt::ArgList &Args); > + > + llvm::opt::DerivedArgList * > + TranslateArgs(const llvm::opt::DerivedArgList &Args, > + const char *BoundArch) const override; > + void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, > + llvm::opt::ArgStringList &CC1Args) const > override; > +}; > + > class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux { > protected: > GCCVersion GCCLibAndIncVersion; > > Modified: cfe/trunk/lib/Driver/Tools.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/Tools.cpp (original) > +++ cfe/trunk/lib/Driver/Tools.cpp Mon Jul 13 15:21:06 2015 > @@ -1488,6 +1488,12 @@ static std::string getCPUName(const ArgL > return CPUName; > } > > + case llvm::Triple::nvptx: > + case llvm::Triple::nvptx64: > + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) > + return A->getValue(); > + return ""; > + > case llvm::Triple::ppc: > case llvm::Triple::ppc64: > case llvm::Triple::ppc64le: { > @@ -2826,8 +2832,14 @@ void Clang::ConstructJob(Compilation &C, > getToolChain().getTriple().isWindowsCygwinEnvironment(); > bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); > > - assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); > + // Check number of inputs for sanity. We need at least one input. > + assert(Inputs.size() >= 1 && "Must have at least one input."); > const InputInfo &Input = Inputs[0]; > + // CUDA compilation may have multiple inputs (source file + results of > + // device-side compilations). All other jobs are expected to have exactly > one > + // input. > + bool IsCuda = types::isCuda(Input.getType()); > + assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple > inputs."); > > // Invoke ourselves in -cc1 mode. > // > @@ -4802,14 +4814,12 @@ void Clang::ConstructJob(Compilation &C, > assert(Output.isNothing() && "Invalid output."); > } > > - for (const auto &II : Inputs) { > - addDashXForInput(Args, II, CmdArgs); > + addDashXForInput(Args, Input, CmdArgs); > > - if (II.isFilename()) > - CmdArgs.push_back(II.getFilename()); > - else > - II.getInputArg().renderAsInput(Args, CmdArgs); > - } > + if (Input.isFilename()) > + CmdArgs.push_back(Input.getFilename()); > + else > + Input.getInputArg().renderAsInput(Args, CmdArgs); > > Args.AddAllArgs(CmdArgs, options::OPT_undef); > > @@ -4847,6 +4857,16 @@ void Clang::ConstructJob(Compilation &C, > CmdArgs.push_back(SplitDwarfOut); > } > > + // Host-side cuda compilation receives device-side outputs as Inputs[1...]. > + // Include them with -fcuda-include-gpubinary. > + if (IsCuda && Inputs.size() > 1) > + for (InputInfoList::const_iterator it = std::next(Inputs.begin()), > + ie = Inputs.end(); > + it != ie; ++it) { > + CmdArgs.push_back("-fcuda-include-gpubinary"); > + CmdArgs.push_back(it->getFilename()); > + } > + > // Finally add the compile command to the compilation. > if (Args.hasArg(options::OPT__SLASH_fallback) && > Output.getType() == types::TY_Object && > > Modified: cfe/trunk/lib/Driver/Types.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Types.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Driver/Types.cpp (original) > +++ cfe/trunk/lib/Driver/Types.cpp Mon Jul 13 15:21:06 2015 > @@ -86,6 +86,7 @@ bool types::isAcceptedByClang(ID Id) { > case TY_C: case TY_PP_C: > case TY_CL: > case TY_CUDA: case TY_PP_CUDA: > + case TY_CUDA_DEVICE: > case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias: > case TY_CXX: case TY_PP_CXX: > case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias: > @@ -122,7 +123,19 @@ bool types::isCXX(ID Id) { > case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias: > case TY_CXXHeader: case TY_PP_CXXHeader: > case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: > - case TY_CUDA: case TY_PP_CUDA: > + case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: > + return true; > + } > +} > + > +bool types::isCuda(ID Id) { > + switch (Id) { > + default: > + return false; > + > + case TY_CUDA: > + case TY_PP_CUDA: > + case TY_CUDA_DEVICE: > return true; > } > } > @@ -206,10 +219,12 @@ void types::getCompilationPhases(ID Id, > P.push_back(phases::Compile); > P.push_back(phases::Backend); > } > - P.push_back(phases::Assemble); > + if (Id != TY_CUDA_DEVICE) > + P.push_back(phases::Assemble); > } > } > - if (!onlyPrecompileType(Id)) { > + > + if (!onlyPrecompileType(Id) && Id != TY_CUDA_DEVICE) { > P.push_back(phases::Link); > } > assert(0 < P.size() && "Not enough phases in list"); > > Modified: cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp (original) > +++ cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp Mon Jul 13 > 15:21:06 2015 > @@ -15,6 +15,7 @@ > #include "clang/Basic/DiagnosticOptions.h" > #include "clang/Driver/Compilation.h" > #include "clang/Driver/Driver.h" > +#include "clang/Driver/Action.h" > #include "clang/Driver/Options.h" > #include "clang/Driver/Tool.h" > #include "clang/Frontend/CompilerInstance.h" > @@ -61,9 +62,21 @@ clang::createInvocationFromCommandLine(A > } > > // We expect to get back exactly one command job, if we didn't something > - // failed. > + // failed. CUDA compilation is an exception as it creates multiple jobs. If > + // that's the case, we proceed with the first job. If caller needs > particular > + // CUDA job, it should be controlled via --cuda-{host|device}-only option > + // passed to the driver. > const driver::JobList &Jobs = C->getJobs(); > - if (Jobs.size() != 1 || !isa<driver::Command>(*Jobs.begin())) { > + bool CudaCompilation = false; > + if (Jobs.size() > 1) { > + for (auto &A : C->getActions()) > + if (isa<driver::CudaDeviceAction>(A)) { > + CudaCompilation = true; > + break; > + } > + } > + if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) || > + (Jobs.size() > 1 && !CudaCompilation)) { > SmallString<256> Msg; > llvm::raw_svector_ostream OS(Msg); > Jobs.Print(OS, "; ", true); > > Added: cfe/trunk/test/Driver/cuda-options.cu > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=242058&view=auto > ============================================================================== > --- cfe/trunk/test/Driver/cuda-options.cu (added) > +++ cfe/trunk/test/Driver/cuda-options.cu Mon Jul 13 15:21:06 2015 > @@ -0,0 +1,108 @@ > +// Tests CUDA compilation pipeline construction in Driver. > + > +// Simple compilation case: > +// RUN: %clang -### -c %s 2>&1 \ > +// Compile device-side to PTX assembly and make sure we use it on the host > side. > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > +// Then compile host side and incorporate device code. > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > +// Make sure we don't link anything. > +// RUN: -check-prefix CUDA-NL %s > + > +// Typical compilation + link case: > +// RUN: %clang -### %s 2>&1 \ > +// Compile device-side to PTX assembly and make sure we use it on the host > side > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > +// Then compile host side and incorporate device code. > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > +// Then link things. > +// RUN: -check-prefix CUDA-L %s > + > +// Verify that -cuda-no-device disables device-side compilation and linking > +// RUN: %clang -### --cuda-host-only %s 2>&1 \ > +// Make sure we didn't run device-side compilation. > +// RUN: | FileCheck -check-prefix CUDA-ND \ > +// Then compile host side and make sure we don't attempt to incorporate GPU > code. > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-NI \ > +// Make sure we don't link anything. > +// RUN: -check-prefix CUDA-NL %s > + > +// Verify that -cuda-no-host disables host-side compilation and linking > +// RUN: %clang -### --cuda-device-only %s 2>&1 \ > +// Compile device-side to PTX assembly > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > +// Make sure there are no host cmpilation or linking. > +// RUN: -check-prefix CUDA-NH -check-prefix CUDA-NL %s > + > +// Verify that with -S we compile host and device sides to assembly > +// and incorporate device code on the host side. > +// RUN: %clang -### -S -c %s 2>&1 \ > +// Compile device-side to PTX assembly > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > +// Then compile host side and incorporate GPU code. > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > +// Make sure we don't link anything. > +// RUN: -check-prefix CUDA-NL %s > + > +// Verify that --cuda-gpu-arch option passes correct GPU > +// archtecture info to device compilation. > +// RUN: %clang -### --cuda-gpu-arch=sm_35 -c %s 2>&1 \ > +// Compile device-side to PTX assembly. > +// RUN: | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \ > +// Then compile host side and incorporate GPU code. > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > +// Make sure we don't link anything. > +// RUN: -check-prefix CUDA-NL %s > + > +// Verify that there is device-side compilation per --cuda-gpu-arch args > +// and that all results are included on the host side. > +// RUN: %clang -### --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \ > +// Compile both device-sides to PTX assembly > +// RUN: | FileCheck \ > +// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \ > +// RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \ > +// Then compile host side and incorporate both device-side outputs > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 -check-prefix > CUDA-H-I2 \ > +// Make sure we don't link anything. > +// RUN: -check-prefix CUDA-NL %s > + > +// Match device-side compilation > +// CUDA-D1: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda" > +// CUDA-D1-SAME: "-fcuda-is-device" > +// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35" > +// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]" > +// CUDA-D1-SAME: "-x" "cuda" > + > +// Match anothe device-side compilation > +// CUDA-D2: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda" > +// CUDA-D2-SAME: "-fcuda-is-device" > +// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30" > +// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]" > +// CUDA-D2-SAME: "-x" "cuda" > + > +// Match no device-side compilation > +// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda" > +// CUDA-ND-SAME-NOT: "-fcuda-is-device" > + > +// Match host-side compilation > +// CUDA-H: "-cc1" "-triple" > +// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda" > +// CUDA-H-SAME-NOT: "-fcuda-is-device" > +// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]" > +// CUDA-H-SAME: "-x" "cuda" > +// CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]" > +// CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]" > + > +// Match no GPU code inclusion. > +// CUDA-H-NI-NOT: "-fcuda-include-gpubinary" > + > +// Match no CUDA compilation > +// CUDA-NH-NOT: "-cc1" "-triple" > +// CUDA-NH-SAME-NOT: "-x" "cuda" > + > +// Match linker > +// CUDA-L: "{{.*}}ld{{(.exe)?}}" > +// CUDA-L-SAME: "[[HOSTOBJ]]" > + > +// Match no linker > +// CUDA-NL-NOT: "{{.*}}ld{{(.exe)?}}" > > Modified: cfe/trunk/test/Index/attributes-cuda.cu > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/attributes-cuda.cu?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/test/Index/attributes-cuda.cu (original) > +++ cfe/trunk/test/Index/attributes-cuda.cu Mon Jul 13 15:21:06 2015 > @@ -1,4 +1,6 @@ > // RUN: c-index-test -test-load-source all -x cuda %s | FileCheck %s > +// RUN: c-index-test -test-load-source all -x cuda --cuda-host-only %s | > FileCheck %s > +// RUN: c-index-test -test-load-source all -x cuda --cuda-device-only %s | > FileCheck %s > > __attribute__((device)) void f_device(); > __attribute__((global)) void f_global(); > @@ -6,13 +8,13 @@ __attribute__((constant)) int* g_constan > __attribute__((shared)) float *g_shared; > __attribute__((host)) void f_host(); > > -// CHECK: attributes-cuda.cu:3:30: FunctionDecl=f_device:3:30 > -// CHECK-NEXT: attributes-cuda.cu:3:16: attribute(device) > -// CHECK: attributes-cuda.cu:4:30: FunctionDecl=f_global:4:30 > -// CHECK-NEXT: attributes-cuda.cu:4:16: attribute(global) > -// CHECK: attributes-cuda.cu:5:32: VarDecl=g_constant:5:32 (Definition) > -// CHECK-NEXT: attributes-cuda.cu:5:16: attribute(constant) > -// CHECK: attributes-cuda.cu:6:32: VarDecl=g_shared:6:32 (Definition) > -// CHECK-NEXT: attributes-cuda.cu:6:16: attribute(shared) > -// CHECK: attributes-cuda.cu:7:28: FunctionDecl=f_host:7:28 > -// CHECK-NEXT: attributes-cuda.cu:7:16: attribute(host) > +// CHECK: attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30 > +// CHECK-NEXT: attributes-cuda.cu:5:16: attribute(device) > +// CHECK: attributes-cuda.cu:6:30: FunctionDecl=f_global:6:30 > +// CHECK-NEXT: attributes-cuda.cu:6:16: attribute(global) > +// CHECK: attributes-cuda.cu:7:32: VarDecl=g_constant:7:32 (Definition) > +// CHECK-NEXT: attributes-cuda.cu:7:16: attribute(constant) > +// CHECK: attributes-cuda.cu:8:32: VarDecl=g_shared:8:32 (Definition) > +// CHECK-NEXT: attributes-cuda.cu:8:16: attribute(shared) > +// CHECK: attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28 > +// CHECK-NEXT: attributes-cuda.cu:9:16: attribute(host) > > Added: cfe/trunk/test/Index/index-file.cu > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/index-file.cu?rev=242058&view=auto > ============================================================================== > --- cfe/trunk/test/Index/index-file.cu (added) > +++ cfe/trunk/test/Index/index-file.cu Mon Jul 13 15:21:06 2015 > @@ -0,0 +1,9 @@ > +// Make sure we can process CUDA file even if driver creates multiple jobs > +// RUN: c-index-test -test-load-source all %s | FileCheck %s > -check-prefix=CHECK-ANY > +// Make sure we process correct side of cuda compilation > +// RUN: c-index-test -test-load-source all --cuda-host-only %s | FileCheck > %s -check-prefix=CHECK-HOST > +// RUN: c-index-test -test-load-source all --cuda-device-only %s | FileCheck > %s -check-prefix=CHECK-DEVICE > + > +// CHECK-ANY: macro definition=__cplusplus > +// CHECK-HOST-NOT: macro definition=__CUDA_ARCH__ > +// CHECK-DEVICE: macro definition=__CUDA_ARCH__ > > Modified: cfe/trunk/tools/libclang/CIndex.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/CIndex.cpp?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/tools/libclang/CIndex.cpp (original) > +++ cfe/trunk/tools/libclang/CIndex.cpp Mon Jul 13 15:21:06 2015 > @@ -3102,6 +3102,12 @@ static void clang_parseTranslationUnit_I > /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies, > /*UserFilesAreVolatile=*/true, ForSerialization, &ErrUnit)); > > + // Early failures in LoadFromCommandLine may return with ErrUnit unset. > + if (!Unit && !ErrUnit) { > + PTUI->result = CXError_ASTReadError; > + return; > + } > + > if (NumErrors != Diags->getClient()->getNumErrors()) { > // Make sure to check that 'Unit' is non-NULL. > if (CXXIdx->getDisplayDiagnostics()) > > Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h?rev=242058&r1=242057&r2=242058&view=diff > ============================================================================== > --- cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h (original) > +++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h Mon Jul 13 15:21:06 2015 > @@ -164,6 +164,7 @@ testing::AssertionResult matchesConditio > std::vector<std::string> Args; > Args.push_back("-xcuda"); > Args.push_back("-fno-ms-extensions"); > + Args.push_back("--cuda-host-only"); > Args.push_back(CompileArg); > if (!runToolOnCodeWithArgs(Factory->create(), > CudaHeader + Code, Args)) { > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits