OK. Thanks. --Artem
On Mon, Jul 13, 2015 at 3:27 PM, Rafael EspĂndola < rafael.espind...@gmail.com> wrote: > I reverted this as it was failing on OS X: > > > http://lab.llvm.org:8080/green/job/clang-stage1-configure-RA_check/9068/console > > On 13 July 2015 at 13:21, Artem Belevich <t...@google.com> wrote: > > Author: tra > > Date: Mon Jul 13 15:21:06 2015 > > New Revision: 242058 > > > > URL: http://llvm.org/viewvc/llvm-project?rev=242058&view=rev > > Log: > > [cuda] Driver changes to compile and stitch together host and > device-side CUDA code. > > > > - Changed driver pipeline to compile host and device side of CUDA > > files and incorporate results of device-side compilation into host > > object file. > > > > - Added a test for cuda pipeline creation in clang driver. > > > > New clang options: > > --cuda-host-only - Do host-side compilation only. > > --cuda-device-only - Do device-side compilation only. > > > > --cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side > > compilation. E.g. sm_35, sm_30. Default is sm_20. May be used more > > than once in which case one device-compilation will be done per > > unique specified GPU architecture. > > > > Differential Revision: http://reviews.llvm.org/D9509 > > > > Added: > > cfe/trunk/test/Driver/cuda-options.cu > > cfe/trunk/test/Index/index-file.cu > > Modified: > > cfe/trunk/include/clang/Driver/Action.h > > cfe/trunk/include/clang/Driver/Options.td > > cfe/trunk/include/clang/Driver/Types.def > > cfe/trunk/include/clang/Driver/Types.h > > cfe/trunk/lib/Driver/Action.cpp > > cfe/trunk/lib/Driver/Driver.cpp > > cfe/trunk/lib/Driver/ToolChain.cpp > > cfe/trunk/lib/Driver/ToolChains.cpp > > cfe/trunk/lib/Driver/ToolChains.h > > cfe/trunk/lib/Driver/Tools.cpp > > cfe/trunk/lib/Driver/Types.cpp > > cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp > > cfe/trunk/test/Index/attributes-cuda.cu > > cfe/trunk/tools/libclang/CIndex.cpp > > cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h > > > > Modified: cfe/trunk/include/clang/Driver/Action.h > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/include/clang/Driver/Action.h (original) > > +++ cfe/trunk/include/clang/Driver/Action.h Mon Jul 13 15:21:06 2015 > > @@ -41,6 +41,8 @@ public: > > enum ActionClass { > > InputClass = 0, > > BindArchClass, > > + CudaDeviceClass, > > + CudaHostClass, > > PreprocessJobClass, > > PrecompileJobClass, > > AnalyzeJobClass, > > @@ -133,6 +135,41 @@ public: > > } > > }; > > > > +class CudaDeviceAction : public Action { > > + virtual void anchor(); > > + /// GPU architecture to bind -- e.g 'sm_35'. > > + const char *GpuArchName; > > + /// True when action results are not consumed by the host action (e.g > when > > + /// -fsyntax-only or --cuda-device-only options are used). > > + bool AtTopLevel; > > + > > +public: > > + CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName, > > + bool AtTopLevel); > > + > > + const char *getGpuArchName() const { return GpuArchName; } > > + bool isAtTopLevel() const { return AtTopLevel; } > > + > > + static bool classof(const Action *A) { > > + return A->getKind() == CudaDeviceClass; > > + } > > +}; > > + > > +class CudaHostAction : public Action { > > + virtual void anchor(); > > + ActionList DeviceActions; > > + > > +public: > > + CudaHostAction(std::unique_ptr<Action> Input, > > + const ActionList &DeviceActions); > > + ~CudaHostAction() override; > > + > > + ActionList &getDeviceActions() { return DeviceActions; } > > + const ActionList &getDeviceActions() const { return DeviceActions; } > > + > > + static bool classof(const Action *A) { return A->getKind() == > CudaHostClass; } > > +}; > > + > > class JobAction : public Action { > > virtual void anchor(); > > protected: > > > > Modified: cfe/trunk/include/clang/Driver/Options.td > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/include/clang/Driver/Options.td (original) > > +++ cfe/trunk/include/clang/Driver/Options.td Mon Jul 13 15:21:06 2015 > > @@ -351,6 +351,12 @@ def cxx_isystem : JoinedOrSeparate<["-"] > > MetaVarName<"<directory>">; > > def c : Flag<["-"], "c">, Flags<[DriverOption]>, > > HelpText<"Only run preprocess, compile, and assemble steps">; > > +def cuda_device_only : Flag<["--"], "cuda-device-only">, > > + HelpText<"Do device-side CUDA compilation only">; > > +def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, > > + Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">; > > +def cuda_host_only : Flag<["--"], "cuda-host-only">, > > + HelpText<"Do host-side CUDA compilation only">; > > def dA : Flag<["-"], "dA">, Group<d_Group>; > > def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>, > > HelpText<"Print macro definitions in -E mode in addition to normal > output">; > > > > Modified: cfe/trunk/include/clang/Driver/Types.def > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/include/clang/Driver/Types.def (original) > > +++ cfe/trunk/include/clang/Driver/Types.def Mon Jul 13 15:21:06 2015 > > @@ -44,6 +44,7 @@ TYPE("c", C, > > TYPE("cl", CL, PP_C, "cl", > "u") > > TYPE("cuda-cpp-output", PP_CUDA, INVALID, "cui", > "u") > > TYPE("cuda", CUDA, PP_CUDA, "cu", > "u") > > +TYPE("cuda", CUDA_DEVICE, PP_CUDA, "cu", > "") > > TYPE("objective-c-cpp-output", PP_ObjC, INVALID, "mi", > "u") > > TYPE("objc-cpp-output", PP_ObjC_Alias, INVALID, "mi", > "u") > > TYPE("objective-c", ObjC, PP_ObjC, "m", > "u") > > > > Modified: cfe/trunk/include/clang/Driver/Types.h > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/include/clang/Driver/Types.h (original) > > +++ cfe/trunk/include/clang/Driver/Types.h Mon Jul 13 15:21:06 2015 > > @@ -63,6 +63,9 @@ namespace types { > > /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and > headers). > > bool isCXX(ID Id); > > > > + /// isCuda - Is this a CUDA input. > > + bool isCuda(ID Id); > > + > > /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and > headers). > > bool isObjC(ID Id); > > > > > > Modified: cfe/trunk/lib/Driver/Action.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/Action.cpp (original) > > +++ cfe/trunk/lib/Driver/Action.cpp Mon Jul 13 15:21:06 2015 > > @@ -24,6 +24,8 @@ const char *Action::getClassName(ActionC > > switch (AC) { > > case InputClass: return "input"; > > case BindArchClass: return "bind-arch"; > > + case CudaDeviceClass: return "cuda-device"; > > + case CudaHostClass: return "cuda-host"; > > case PreprocessJobClass: return "preprocessor"; > > case PrecompileJobClass: return "precompiler"; > > case AnalyzeJobClass: return "analyzer"; > > @@ -53,6 +55,25 @@ BindArchAction::BindArchAction(std::uniq > > const char *_ArchName) > > : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {} > > > > +void CudaDeviceAction::anchor() {} > > + > > +CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input, > > + const char *ArchName, bool > AtTopLevel) > > + : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName), > > + AtTopLevel(AtTopLevel) {} > > + > > +void CudaHostAction::anchor() {} > > + > > +CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input, > > + const ActionList &_DeviceActions) > > + : Action(CudaHostClass, std::move(Input)), > DeviceActions(_DeviceActions) {} > > + > > +CudaHostAction::~CudaHostAction() { > > + for (iterator it = DeviceActions.begin(), ie = DeviceActions.end(); > it != ie; > > + ++it) > > + delete *it; > > +} > > + > > void JobAction::anchor() {} > > > > JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input, > > > > Modified: cfe/trunk/lib/Driver/Driver.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/Driver.cpp (original) > > +++ cfe/trunk/lib/Driver/Driver.cpp Mon Jul 13 15:21:06 2015 > > @@ -174,8 +174,10 @@ phases::ID Driver::getFinalPhase(const D > > } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { > > FinalPhase = phases::Backend; > > > > - // -c only runs up to the assembler. > > - } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { > > + // -c and partial CUDA compilations only run up to the assembler. > > + } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) || > > + (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) > || > > + (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) { > > FinalPhase = phases::Assemble; > > > > // Otherwise do everything. > > @@ -900,9 +902,20 @@ static unsigned PrintActions1(const Comp > > } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) { > > os << '"' << BIA->getArchName() << '"' << ", {" > > << PrintActions1(C, *BIA->begin(), Ids) << "}"; > > + } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { > > + os << '"' << CDA->getGpuArchName() << '"' << ", {" > > + << PrintActions1(C, *CDA->begin(), Ids) << "}"; > > } else { > > + ActionList *AL; > > + if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { > > + os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}" > > + << ", gpu binaries "; > > + AL = &CHA->getDeviceActions(); > > + } else > > + AL = &A->getInputs(); > > + > > const char *Prefix = "{"; > > - for (Action *PreRequisite : *A) { > > + for (Action *PreRequisite : *AL) { > > os << Prefix << PrintActions1(C, PreRequisite, Ids); > > Prefix = ", "; > > } > > @@ -1215,6 +1228,93 @@ void Driver::BuildInputs(const ToolChain > > } > > } > > > > +// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE > input > > +// action and then wraps each in CudaDeviceAction paired with > appropriate GPU > > +// arch name. If we're only building device-side code, each action > remains > > +// independent. Otherwise we pass device-side actions as inputs to a new > > +// CudaHostAction which combines both host and device side actions. > > +static std::unique_ptr<Action> > > +buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList > &Args, > > + const Arg *InputArg, const types::ID InputType, > > + std::unique_ptr<Action> Current, ActionList &Actions) { > > + > > + assert(InputType == types::TY_CUDA && > > + "CUDA Actions only apply to CUDA inputs."); > > + > > + // Collect all cuda_gpu_arch parameters, removing duplicates. > > + SmallVector<const char *, 4> GpuArchList; > > + llvm::StringSet<> GpuArchNames; > > + for (Arg *A : Args) { > > + if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) { > > + A->claim(); > > + if (GpuArchNames.insert(A->getValue()).second) > > + GpuArchList.push_back(A->getValue()); > > + } > > + } > > + > > + // Default to sm_20 which is the lowest common denominator for > supported GPUs. > > + // sm_20 code should work correctly, if suboptimally, on all newer > GPUs. > > + if (GpuArchList.empty()) > > + GpuArchList.push_back("sm_20"); > > + > > + // Replicate inputs for each GPU architecture. > > + Driver::InputList CudaDeviceInputs; > > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) > > + CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, > InputArg)); > > + > > + // Build actions for all device inputs. > > + ActionList CudaDeviceActions; > > + D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions); > > + assert(GpuArchList.size() == CudaDeviceActions.size() && > > + "Failed to create actions for all devices"); > > + > > + // Check whether any of device actions stopped before they could > generate PTX. > > + bool PartialCompilation = false; > > + bool DeviceOnlyCompilation = > Args.hasArg(options::OPT_cuda_device_only); > > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) { > > + if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) { > > + PartialCompilation = true; > > + break; > > + } > > + } > > + > > + // Figure out what to do with device actions -- pass them as inputs > to the > > + // host action or run each of them independently. > > + if (PartialCompilation || DeviceOnlyCompilation) { > > + // In case of partial or device-only compilation results of device > actions > > + // are not consumed by the host action device actions have to be > added to > > + // top-level actions list with AtTopLevel=true and run > independently. > > + > > + // -o is ambiguous if we have more than one top-level action. > > + if (Args.hasArg(options::OPT_o) && > > + (!DeviceOnlyCompilation || GpuArchList.size() > 1)) { > > + D.Diag(clang::diag::err_drv_output_argument_with_multiple_files); > > + return nullptr; > > + } > > + > > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) > > + Actions.push_back( > > + new > CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]), > > + GpuArchList[i], /* AtTopLevel */ true)); > > + // Kill host action in case of device-only compilation. > > + if (DeviceOnlyCompilation) > > + Current.reset(nullptr); > > + return Current; > > + } else { > > + // Outputs of device actions during complete CUDA compilation get > created > > + // with AtTopLevel=false and become inputs for the host action. > > + ActionList DeviceActions; > > + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) > > + DeviceActions.push_back( > > + new > CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]), > > + GpuArchList[i], /* AtTopLevel */ false)); > > + // Return a new host action that incorporates original host action > and all > > + // device actions. > > + return std::unique_ptr<Action>( > > + new CudaHostAction(std::move(Current), DeviceActions)); > > + } > > +} > > + > > void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, > > const InputList &Inputs, ActionList &Actions) > const { > > llvm::PrettyStackTraceString CrashInfo("Building compilation > actions"); > > @@ -1312,6 +1412,25 @@ void Driver::BuildActions(const ToolChai > > continue; > > } > > > > + phases::ID CudaInjectionPhase; > > + if (isSaveTempsEnabled()) { > > + // All phases are done independently, inject GPU blobs during > compilation > > + // phase as that's where we generate glue code to init them. > > + CudaInjectionPhase = phases::Compile; > > + } else { > > + // Assumes that clang does everything up until linking phase, so > we inject > > + // cuda device actions at the last step before linking. Otherwise > CUDA > > + // host action forces preprocessor into a separate invocation. > > + if (FinalPhase == phases::Link) { > > + for (auto i = PL.begin(), e = PL.end(); i != e; ++i) { > > + auto next = i + 1; > > + if (next != e && *next == phases::Link) > > + CudaInjectionPhase = *i; > > + } > > + } else > > + CudaInjectionPhase = FinalPhase; > > + } > > + > > // Build the pipeline for this file. > > std::unique_ptr<Action> Current(new InputAction(*InputArg, > InputType)); > > for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = > PL.end(); > > @@ -1337,6 +1456,15 @@ void Driver::BuildActions(const ToolChai > > > > // Otherwise construct the appropriate action. > > Current = ConstructPhaseAction(TC, Args, Phase, > std::move(Current)); > > + > > + if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase && > > + !Args.hasArg(options::OPT_cuda_host_only)) { > > + Current = buildCudaActions(*this, TC, Args, InputArg, InputType, > > + std::move(Current), Actions); > > + if (!Current) > > + break; > > + } > > + > > if (Current->getType() == types::TY_Nothing) > > break; > > } > > @@ -1576,7 +1704,13 @@ static const Tool *SelectToolForJob(Comp > > if (isa<BackendJobAction>(JA)) { > > // Check if the compiler supports emitting LLVM IR. > > assert(Inputs->size() == 1); > > - JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin()); > > + JobAction *CompileJA; > > + // Extract real host action, if it's a CudaHostAction. > > + if (CudaHostAction *CudaHA = > dyn_cast<CudaHostAction>(*Inputs->begin())) > > + CompileJA = cast<CompileJobAction>(*CudaHA->begin()); > > + else > > + CompileJA = cast<CompileJobAction>(*Inputs->begin()); > > + > > const Tool *Compiler = TC->SelectTool(*CompileJA); > > if (!Compiler) > > return nullptr; > > @@ -1610,6 +1744,20 @@ void Driver::BuildJobsForAction(Compilat > > InputInfo &Result) const { > > llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); > > > > + InputInfoList CudaDeviceInputInfos; > > + if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { > > + InputInfo II; > > + // Append outputs of device jobs to the input list. > > + for (const Action *DA : CHA->getDeviceActions()) { > > + BuildJobsForAction(C, DA, TC, "", AtTopLevel, > > + /*MultipleArchs*/ false, LinkingOutput, II); > > + CudaDeviceInputInfos.push_back(II); > > + } > > + // Override current action with a real host compile action and > continue > > + // processing it. > > + A = *CHA->begin(); > > + } > > + > > if (const InputAction *IA = dyn_cast<InputAction>(A)) { > > // FIXME: It would be nice to not claim this here; maybe the old > scheme of > > // just using Args was better? > > @@ -1635,11 +1783,24 @@ void Driver::BuildJobsForAction(Compilat > > else > > TC = &C.getDefaultToolChain(); > > > > - BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), > AtTopLevel, > > + BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel, > > MultipleArchs, LinkingOutput, Result); > > return; > > } > > > > + if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { > > + // Figure out which NVPTX triple to use for device-side compilation > based on > > + // whether host is 64-bit. > > + llvm::Triple > DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit() > > + ? "nvptx64-nvidia-cuda" > > + : "nvptx-nvidia-cuda"); > > + BuildJobsForAction(C, *CDA->begin(), > > + &getToolChain(C.getArgs(), DeviceTriple), > > + CDA->getGpuArchName(), CDA->isAtTopLevel(), > > + /*MultipleArchs*/ true, LinkingOutput, Result); > > + return; > > + } > > + > > const ActionList *Inputs = &A->getInputs(); > > > > const JobAction *JA = cast<JobAction>(A); > > @@ -1671,6 +1832,10 @@ void Driver::BuildJobsForAction(Compilat > > if (JA->getType() == types::TY_dSYM) > > BaseInput = InputInfos[0].getFilename(); > > > > + // Append outputs of cuda device jobs to the input list > > + if (CudaDeviceInputInfos.size()) > > + InputInfos.append(CudaDeviceInputInfos.begin(), > CudaDeviceInputInfos.end()); > > + > > // Determine the place to write output to, if any. > > if (JA->getType() == types::TY_Nothing) > > Result = InputInfo(A->getType(), BaseInput); > > @@ -2052,6 +2217,9 @@ const ToolChain &Driver::getToolChain(co > > break; > > } > > break; > > + case llvm::Triple::CUDA: > > + TC = new toolchains::CudaToolChain(*this, Target, Args); > > + break; > > default: > > // Of these targets, Hexagon is the only one that might have > > // an OS of Linux, in which case it got handled above already. > > > > Modified: cfe/trunk/lib/Driver/ToolChain.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChain.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/ToolChain.cpp (original) > > +++ cfe/trunk/lib/Driver/ToolChain.cpp Mon Jul 13 15:21:06 2015 > > @@ -151,6 +151,8 @@ Tool *ToolChain::getTool(Action::ActionC > > > > case Action::InputClass: > > case Action::BindArchClass: > > + case Action::CudaDeviceClass: > > + case Action::CudaHostClass: > > case Action::LipoJobClass: > > case Action::DsymutilJobClass: > > case Action::VerifyDebugInfoJobClass: > > > > Modified: cfe/trunk/lib/Driver/ToolChains.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/ToolChains.cpp (original) > > +++ cfe/trunk/lib/Driver/ToolChains.cpp Mon Jul 13 15:21:06 2015 > > @@ -3652,6 +3652,65 @@ Tool *DragonFly::buildLinker() const { > > return new tools::dragonfly::Linker(*this); > > } > > > > +/// Stub for CUDA toolchain. At the moment we don't have assembler or > > +/// linker and need toolchain mainly to propagate device-side options > > +/// to CC1. > > + > > +CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple > &Triple, > > + const ArgList &Args) > > + : Linux(D, Triple, Args) {} > > + > > +void > > +CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList > &DriverArgs, > > + llvm::opt::ArgStringList &CC1Args) > const { > > + Linux::addClangTargetOptions(DriverArgs, CC1Args); > > + CC1Args.push_back("-fcuda-is-device"); > > +} > > + > > +llvm::opt::DerivedArgList * > > +CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, > > + const char *BoundArch) const { > > + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); > > + const OptTable &Opts = getDriver().getOpts(); > > + > > + for (Arg *A : Args) { > > + if (A->getOption().matches(options::OPT_Xarch__)) { > > + // Skip this argument unless the architecture matches BoundArch > > + if (A->getValue(0) != StringRef(BoundArch)) > > + continue; > > + > > + unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); > > + unsigned Prev = Index; > > + std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index)); > > + > > + // If the argument parsing failed or more than one argument was > > + // consumed, the -Xarch_ argument's parameter tried to consume > > + // extra arguments. Emit an error and ignore. > > + // > > + // We also want to disallow any options which would alter the > > + // driver behavior; that isn't going to work in our model. We > > + // use isDriverOption() as an approximation, although things > > + // like -O4 are going to slip through. > > + if (!XarchArg || Index > Prev + 1) { > > + getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args) > > + << A->getAsString(Args); > > + continue; > > + } else if (XarchArg->getOption().hasFlag(options::DriverOption)) { > > + getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver) > > + << A->getAsString(Args); > > + continue; > > + } > > + XarchArg->setBaseArg(A); > > + A = XarchArg.release(); > > + DAL->AddSynthesizedArg(A); > > + } > > + DAL->append(A); > > + } > > + > > + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), > BoundArch); > > + return DAL; > > +} > > + > > /// XCore tool chain > > XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList > &Args) > > : ToolChain(D, Triple, Args) { > > > > Modified: cfe/trunk/lib/Driver/ToolChains.h > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains.h?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/ToolChains.h (original) > > +++ cfe/trunk/lib/Driver/ToolChains.h Mon Jul 13 15:21:06 2015 > > @@ -699,6 +699,18 @@ private: > > std::string computeSysRoot() const; > > }; > > > > +class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux { > > +public: > > + CudaToolChain(const Driver &D, const llvm::Triple &Triple, > > + const llvm::opt::ArgList &Args); > > + > > + llvm::opt::DerivedArgList * > > + TranslateArgs(const llvm::opt::DerivedArgList &Args, > > + const char *BoundArch) const override; > > + void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, > > + llvm::opt::ArgStringList &CC1Args) const > override; > > +}; > > + > > class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux { > > protected: > > GCCVersion GCCLibAndIncVersion; > > > > Modified: cfe/trunk/lib/Driver/Tools.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/Tools.cpp (original) > > +++ cfe/trunk/lib/Driver/Tools.cpp Mon Jul 13 15:21:06 2015 > > @@ -1488,6 +1488,12 @@ static std::string getCPUName(const ArgL > > return CPUName; > > } > > > > + case llvm::Triple::nvptx: > > + case llvm::Triple::nvptx64: > > + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) > > + return A->getValue(); > > + return ""; > > + > > case llvm::Triple::ppc: > > case llvm::Triple::ppc64: > > case llvm::Triple::ppc64le: { > > @@ -2826,8 +2832,14 @@ void Clang::ConstructJob(Compilation &C, > > getToolChain().getTriple().isWindowsCygwinEnvironment(); > > bool IsWindowsMSVC = > getToolChain().getTriple().isWindowsMSVCEnvironment(); > > > > - assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); > > + // Check number of inputs for sanity. We need at least one input. > > + assert(Inputs.size() >= 1 && "Must have at least one input."); > > const InputInfo &Input = Inputs[0]; > > + // CUDA compilation may have multiple inputs (source file + results of > > + // device-side compilations). All other jobs are expected to have > exactly one > > + // input. > > + bool IsCuda = types::isCuda(Input.getType()); > > + assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple > inputs."); > > > > // Invoke ourselves in -cc1 mode. > > // > > @@ -4802,14 +4814,12 @@ void Clang::ConstructJob(Compilation &C, > > assert(Output.isNothing() && "Invalid output."); > > } > > > > - for (const auto &II : Inputs) { > > - addDashXForInput(Args, II, CmdArgs); > > + addDashXForInput(Args, Input, CmdArgs); > > > > - if (II.isFilename()) > > - CmdArgs.push_back(II.getFilename()); > > - else > > - II.getInputArg().renderAsInput(Args, CmdArgs); > > - } > > + if (Input.isFilename()) > > + CmdArgs.push_back(Input.getFilename()); > > + else > > + Input.getInputArg().renderAsInput(Args, CmdArgs); > > > > Args.AddAllArgs(CmdArgs, options::OPT_undef); > > > > @@ -4847,6 +4857,16 @@ void Clang::ConstructJob(Compilation &C, > > CmdArgs.push_back(SplitDwarfOut); > > } > > > > + // Host-side cuda compilation receives device-side outputs as > Inputs[1...]. > > + // Include them with -fcuda-include-gpubinary. > > + if (IsCuda && Inputs.size() > 1) > > + for (InputInfoList::const_iterator it = std::next(Inputs.begin()), > > + ie = Inputs.end(); > > + it != ie; ++it) { > > + CmdArgs.push_back("-fcuda-include-gpubinary"); > > + CmdArgs.push_back(it->getFilename()); > > + } > > + > > // Finally add the compile command to the compilation. > > if (Args.hasArg(options::OPT__SLASH_fallback) && > > Output.getType() == types::TY_Object && > > > > Modified: cfe/trunk/lib/Driver/Types.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Types.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Driver/Types.cpp (original) > > +++ cfe/trunk/lib/Driver/Types.cpp Mon Jul 13 15:21:06 2015 > > @@ -86,6 +86,7 @@ bool types::isAcceptedByClang(ID Id) { > > case TY_C: case TY_PP_C: > > case TY_CL: > > case TY_CUDA: case TY_PP_CUDA: > > + case TY_CUDA_DEVICE: > > case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias: > > case TY_CXX: case TY_PP_CXX: > > case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias: > > @@ -122,7 +123,19 @@ bool types::isCXX(ID Id) { > > case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias: > > case TY_CXXHeader: case TY_PP_CXXHeader: > > case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: > > - case TY_CUDA: case TY_PP_CUDA: > > + case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: > > + return true; > > + } > > +} > > + > > +bool types::isCuda(ID Id) { > > + switch (Id) { > > + default: > > + return false; > > + > > + case TY_CUDA: > > + case TY_PP_CUDA: > > + case TY_CUDA_DEVICE: > > return true; > > } > > } > > @@ -206,10 +219,12 @@ void types::getCompilationPhases(ID Id, > > P.push_back(phases::Compile); > > P.push_back(phases::Backend); > > } > > - P.push_back(phases::Assemble); > > + if (Id != TY_CUDA_DEVICE) > > + P.push_back(phases::Assemble); > > } > > } > > - if (!onlyPrecompileType(Id)) { > > + > > + if (!onlyPrecompileType(Id) && Id != TY_CUDA_DEVICE) { > > P.push_back(phases::Link); > > } > > assert(0 < P.size() && "Not enough phases in list"); > > > > Modified: cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp (original) > > +++ cfe/trunk/lib/Frontend/CreateInvocationFromCommandLine.cpp Mon Jul > 13 15:21:06 2015 > > @@ -15,6 +15,7 @@ > > #include "clang/Basic/DiagnosticOptions.h" > > #include "clang/Driver/Compilation.h" > > #include "clang/Driver/Driver.h" > > +#include "clang/Driver/Action.h" > > #include "clang/Driver/Options.h" > > #include "clang/Driver/Tool.h" > > #include "clang/Frontend/CompilerInstance.h" > > @@ -61,9 +62,21 @@ clang::createInvocationFromCommandLine(A > > } > > > > // We expect to get back exactly one command job, if we didn't > something > > - // failed. > > + // failed. CUDA compilation is an exception as it creates multiple > jobs. If > > + // that's the case, we proceed with the first job. If caller needs > particular > > + // CUDA job, it should be controlled via --cuda-{host|device}-only > option > > + // passed to the driver. > > const driver::JobList &Jobs = C->getJobs(); > > - if (Jobs.size() != 1 || !isa<driver::Command>(*Jobs.begin())) { > > + bool CudaCompilation = false; > > + if (Jobs.size() > 1) { > > + for (auto &A : C->getActions()) > > + if (isa<driver::CudaDeviceAction>(A)) { > > + CudaCompilation = true; > > + break; > > + } > > + } > > + if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) || > > + (Jobs.size() > 1 && !CudaCompilation)) { > > SmallString<256> Msg; > > llvm::raw_svector_ostream OS(Msg); > > Jobs.Print(OS, "; ", true); > > > > Added: cfe/trunk/test/Driver/cuda-options.cu > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-options.cu?rev=242058&view=auto > > > ============================================================================== > > --- cfe/trunk/test/Driver/cuda-options.cu (added) > > +++ cfe/trunk/test/Driver/cuda-options.cu Mon Jul 13 15:21:06 2015 > > @@ -0,0 +1,108 @@ > > +// Tests CUDA compilation pipeline construction in Driver. > > + > > +// Simple compilation case: > > +// RUN: %clang -### -c %s 2>&1 \ > > +// Compile device-side to PTX assembly and make sure we use it on the > host side. > > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > > +// Then compile host side and incorporate device code. > > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > > +// Make sure we don't link anything. > > +// RUN: -check-prefix CUDA-NL %s > > + > > +// Typical compilation + link case: > > +// RUN: %clang -### %s 2>&1 \ > > +// Compile device-side to PTX assembly and make sure we use it on the > host side > > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > > +// Then compile host side and incorporate device code. > > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > > +// Then link things. > > +// RUN: -check-prefix CUDA-L %s > > + > > +// Verify that -cuda-no-device disables device-side compilation and > linking > > +// RUN: %clang -### --cuda-host-only %s 2>&1 \ > > +// Make sure we didn't run device-side compilation. > > +// RUN: | FileCheck -check-prefix CUDA-ND \ > > +// Then compile host side and make sure we don't attempt to incorporate > GPU code. > > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-NI \ > > +// Make sure we don't link anything. > > +// RUN: -check-prefix CUDA-NL %s > > + > > +// Verify that -cuda-no-host disables host-side compilation and linking > > +// RUN: %clang -### --cuda-device-only %s 2>&1 \ > > +// Compile device-side to PTX assembly > > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > > +// Make sure there are no host cmpilation or linking. > > +// RUN: -check-prefix CUDA-NH -check-prefix CUDA-NL %s > > + > > +// Verify that with -S we compile host and device sides to assembly > > +// and incorporate device code on the host side. > > +// RUN: %clang -### -S -c %s 2>&1 \ > > +// Compile device-side to PTX assembly > > +// RUN: | FileCheck -check-prefix CUDA-D1 \ > > +// Then compile host side and incorporate GPU code. > > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > > +// Make sure we don't link anything. > > +// RUN: -check-prefix CUDA-NL %s > > + > > +// Verify that --cuda-gpu-arch option passes correct GPU > > +// archtecture info to device compilation. > > +// RUN: %clang -### --cuda-gpu-arch=sm_35 -c %s 2>&1 \ > > +// Compile device-side to PTX assembly. > > +// RUN: | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \ > > +// Then compile host side and incorporate GPU code. > > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 \ > > +// Make sure we don't link anything. > > +// RUN: -check-prefix CUDA-NL %s > > + > > +// Verify that there is device-side compilation per --cuda-gpu-arch args > > +// and that all results are included on the host side. > > +// RUN: %clang -### --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s > 2>&1 \ > > +// Compile both device-sides to PTX assembly > > +// RUN: | FileCheck \ > > +// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \ > > +// RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \ > > +// Then compile host side and incorporate both device-side outputs > > +// RUN: -check-prefix CUDA-H -check-prefix CUDA-H-I1 -check-prefix > CUDA-H-I2 \ > > +// Make sure we don't link anything. > > +// RUN: -check-prefix CUDA-NL %s > > + > > +// Match device-side compilation > > +// CUDA-D1: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda" > > +// CUDA-D1-SAME: "-fcuda-is-device" > > +// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35" > > +// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]" > > +// CUDA-D1-SAME: "-x" "cuda" > > + > > +// Match anothe device-side compilation > > +// CUDA-D2: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda" > > +// CUDA-D2-SAME: "-fcuda-is-device" > > +// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30" > > +// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]" > > +// CUDA-D2-SAME: "-x" "cuda" > > + > > +// Match no device-side compilation > > +// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda" > > +// CUDA-ND-SAME-NOT: "-fcuda-is-device" > > + > > +// Match host-side compilation > > +// CUDA-H: "-cc1" "-triple" > > +// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda" > > +// CUDA-H-SAME-NOT: "-fcuda-is-device" > > +// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]" > > +// CUDA-H-SAME: "-x" "cuda" > > +// CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]" > > +// CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]" > > + > > +// Match no GPU code inclusion. > > +// CUDA-H-NI-NOT: "-fcuda-include-gpubinary" > > + > > +// Match no CUDA compilation > > +// CUDA-NH-NOT: "-cc1" "-triple" > > +// CUDA-NH-SAME-NOT: "-x" "cuda" > > + > > +// Match linker > > +// CUDA-L: "{{.*}}ld{{(.exe)?}}" > > +// CUDA-L-SAME: "[[HOSTOBJ]]" > > + > > +// Match no linker > > +// CUDA-NL-NOT: "{{.*}}ld{{(.exe)?}}" > > > > Modified: cfe/trunk/test/Index/attributes-cuda.cu > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/attributes-cuda.cu?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/test/Index/attributes-cuda.cu (original) > > +++ cfe/trunk/test/Index/attributes-cuda.cu Mon Jul 13 15:21:06 2015 > > @@ -1,4 +1,6 @@ > > // RUN: c-index-test -test-load-source all -x cuda %s | FileCheck %s > > +// RUN: c-index-test -test-load-source all -x cuda --cuda-host-only %s > | FileCheck %s > > +// RUN: c-index-test -test-load-source all -x cuda --cuda-device-only > %s | FileCheck %s > > > > __attribute__((device)) void f_device(); > > __attribute__((global)) void f_global(); > > @@ -6,13 +8,13 @@ __attribute__((constant)) int* g_constan > > __attribute__((shared)) float *g_shared; > > __attribute__((host)) void f_host(); > > > > -// CHECK: attributes-cuda.cu:3:30: FunctionDecl=f_device:3:30 > > -// CHECK-NEXT: attributes-cuda.cu:3:16: attribute(device) > > -// CHECK: attributes-cuda.cu:4:30: FunctionDecl=f_global:4:30 > > -// CHECK-NEXT: attributes-cuda.cu:4:16: attribute(global) > > -// CHECK: attributes-cuda.cu:5:32: VarDecl=g_constant:5:32 > (Definition) > > -// CHECK-NEXT: attributes-cuda.cu:5:16: attribute(constant) > > -// CHECK: attributes-cuda.cu:6:32: VarDecl=g_shared:6:32 > (Definition) > > -// CHECK-NEXT: attributes-cuda.cu:6:16: attribute(shared) > > -// CHECK: attributes-cuda.cu:7:28: FunctionDecl=f_host:7:28 > > -// CHECK-NEXT: attributes-cuda.cu:7:16: attribute(host) > > +// CHECK: attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30 > > +// CHECK-NEXT: attributes-cuda.cu:5:16: attribute(device) > > +// CHECK: attributes-cuda.cu:6:30: FunctionDecl=f_global:6:30 > > +// CHECK-NEXT: attributes-cuda.cu:6:16: attribute(global) > > +// CHECK: attributes-cuda.cu:7:32: VarDecl=g_constant:7:32 > (Definition) > > +// CHECK-NEXT: attributes-cuda.cu:7:16: attribute(constant) > > +// CHECK: attributes-cuda.cu:8:32: VarDecl=g_shared:8:32 > (Definition) > > +// CHECK-NEXT: attributes-cuda.cu:8:16: attribute(shared) > > +// CHECK: attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28 > > +// CHECK-NEXT: attributes-cuda.cu:9:16: attribute(host) > > > > Added: cfe/trunk/test/Index/index-file.cu > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/index-file.cu?rev=242058&view=auto > > > ============================================================================== > > --- cfe/trunk/test/Index/index-file.cu (added) > > +++ cfe/trunk/test/Index/index-file.cu Mon Jul 13 15:21:06 2015 > > @@ -0,0 +1,9 @@ > > +// Make sure we can process CUDA file even if driver creates multiple > jobs > > +// RUN: c-index-test -test-load-source all %s | FileCheck %s > -check-prefix=CHECK-ANY > > +// Make sure we process correct side of cuda compilation > > +// RUN: c-index-test -test-load-source all --cuda-host-only %s | > FileCheck %s -check-prefix=CHECK-HOST > > +// RUN: c-index-test -test-load-source all --cuda-device-only %s | > FileCheck %s -check-prefix=CHECK-DEVICE > > + > > +// CHECK-ANY: macro definition=__cplusplus > > +// CHECK-HOST-NOT: macro definition=__CUDA_ARCH__ > > +// CHECK-DEVICE: macro definition=__CUDA_ARCH__ > > > > Modified: cfe/trunk/tools/libclang/CIndex.cpp > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/CIndex.cpp?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/tools/libclang/CIndex.cpp (original) > > +++ cfe/trunk/tools/libclang/CIndex.cpp Mon Jul 13 15:21:06 2015 > > @@ -3102,6 +3102,12 @@ static void clang_parseTranslationUnit_I > > /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies, > > /*UserFilesAreVolatile=*/true, ForSerialization, &ErrUnit)); > > > > + // Early failures in LoadFromCommandLine may return with ErrUnit > unset. > > + if (!Unit && !ErrUnit) { > > + PTUI->result = CXError_ASTReadError; > > + return; > > + } > > + > > if (NumErrors != Diags->getClient()->getNumErrors()) { > > // Make sure to check that 'Unit' is non-NULL. > > if (CXXIdx->getDisplayDiagnostics()) > > > > Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h > > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h?rev=242058&r1=242057&r2=242058&view=diff > > > ============================================================================== > > --- cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h (original) > > +++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTest.h Mon Jul 13 > 15:21:06 2015 > > @@ -164,6 +164,7 @@ testing::AssertionResult matchesConditio > > std::vector<std::string> Args; > > Args.push_back("-xcuda"); > > Args.push_back("-fno-ms-extensions"); > > + Args.push_back("--cuda-host-only"); > > Args.push_back(CompileArg); > > if (!runToolOnCodeWithArgs(Factory->create(), > > CudaHeader + Code, Args)) { > > > > > > _______________________________________________ > > cfe-commits mailing list > > cfe-commits@cs.uiuc.edu > > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > -- --Artem Belevich
_______________________________________________ cfe-commits mailing list cfe-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits