tra created this revision.
tra added a reviewer: echristo.
tra added a subscriber: cfe-commits.

  - added detection of libdevice bitcode file and API to find one appropriate 
for the GPU we're compiling for.
  - added include paths to detected CUDA installation
  - added include paths for -aux-triple
  - added flags for linking in detected libdevice bitcode
  - added -nocudalib/-nocudainc to prevent automatic linking with libdevice of 
adding CUDA include path.
  - added test cases to verify new functionality



http://reviews.llvm.org/D13170

Files:
  include/clang/Driver/Driver.h
  include/clang/Driver/Options.td
  include/clang/Driver/ToolChain.h
  lib/Driver/ToolChain.cpp
  lib/Driver/ToolChains.cpp
  lib/Driver/ToolChains.h
  lib/Driver/Tools.cpp
  lib/Driver/Tools.h
  test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/.keep
  
test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
  
test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
  test/Driver/cuda-detect.cu

Index: test/Driver/cuda-detect.cu
===================================================================
--- test/Driver/cuda-detect.cu
+++ test/Driver/cuda-detect.cu
@@ -1,10 +1,56 @@
 // REQUIRES: clang-driver
 // REQUIRES: x86-registered-target
 //
+// # Check that we properly detect CUDA installation.
 // RUN: %clang -v --target=i386-unknown-linux \
 // RUN:   --sysroot=/tmp/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
 // RUN: %clang -v --target=i386-unknown-linux \
+// RUN:   --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
+// RUN: %clang -v --target=i386-unknown-linux \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
 
+// Make sure we map libdevice bitcode files to proper GPUs.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix LIBDEVICE -check-prefix LIBDEVICE21
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 \
+// RUN:   -check-prefix CUDAINC
+// Verify that -nocudainc prevents adding include path to CUDA headers.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 \
+// RUN:   -check-prefix NOCUDAINC
+
+// Verify that no options related to bitcode linking are passes if
+// there's no bitcode file.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix NOLIBDEVICE
+// .. or if we explicitly passed -nocudalib
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix NOLIBDEVICE
+
 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
 // NOCUDA-NOT: Found CUDA installation:
+
+// LIBDEVICE: "-triple" "nvptx-nvidia-cuda"
+// LIBDEVICE-SAME: "-fcuda-is-device"
+// LIBDEVICE-SAME: "-mlink-bitcode-file"
+// LIBDEVICE21-SAME: libdevice.compute_20.10.bc
+// LIBDEVICE35-SAME: libdevice.compute_35.10.bc
+// LIBDEVICE-SAME: "-fcuda-uses-libdevice"
+// LIBDEVICE-SAME: "-target-feature" "+ptx42"
+// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include"
+// NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include"
+// LIBDEVICE-SAME: "-x" "cuda"
+
+// NOLIBDEVICE: "-triple" "nvptx-nvidia-cuda"
+// NOLIBDEVICE-SAME: "-fcuda-is-device"
+// NOLIBDEVICE-NOT: "-mlink-bitcode-file"
+// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
+// NOLIBDEVICE-NOT: "-fcuda-uses-libdevice"
+// NOLIBDEVICE-NOT: "-target-feature"
+// NOLIBDEVICE-SAME: "-x" "cuda"
Index: lib/Driver/Tools.h
===================================================================
--- lib/Driver/Tools.h
+++ lib/Driver/Tools.h
@@ -55,7 +55,8 @@
                                const Driver &D, const llvm::opt::ArgList &Args,
                                llvm::opt::ArgStringList &CmdArgs,
                                const InputInfo &Output,
-                               const InputInfoList &Inputs) const;
+                               const InputInfoList &Inputs,
+                               const char *AuxTriple) const;
 
   void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs) const;
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -248,7 +248,8 @@
                                     const Driver &D, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     const InputInfo &Output,
-                                    const InputInfoList &Inputs) const {
+                                    const InputInfoList &Inputs,
+                                    const char *AuxTriple) const {
   Arg *A;
 
   CheckPreprocessingOptions(D, Args);
@@ -446,6 +447,16 @@
 
   // Add system include arguments.
   getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
+
+  // Add CUDA include arguments
+  if (types::isCuda(Inputs[0].getType())) {
+    getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
+    if (AuxTriple) {
+      const ToolChain &AuxTC = D.getToolChain(Args, llvm::Triple(AuxTriple));
+      AuxTC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+      AuxTC.AddClangSystemIncludeArgs(Args, CmdArgs);
+    }
+  }
 }
 
 // FIXME: Move to target hook.
@@ -3202,9 +3213,12 @@
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
-  if (IsCuda && AuxTriple != nullptr) {
-    CmdArgs.push_back("-aux-triple");
-    CmdArgs.push_back(AuxTriple);
+  if (IsCuda) {
+    if (AuxTriple != nullptr) {
+      CmdArgs.push_back("-aux-triple");
+      CmdArgs.push_back(AuxTriple);
+    }
+    CmdArgs.push_back("-fcuda-target-overloads");
   }
 
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
@@ -3981,7 +3995,7 @@
   //
   // FIXME: Support -fpreprocessed
   if (types::getPreprocessedType(InputType) != types::TY_INVALID)
-    AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);
+    AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs, AuxTriple);
 
   // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
   // that "The compiler can only warn and ignore the option if not recognized".
Index: lib/Driver/ToolChains.h
===================================================================
--- lib/Driver/ToolChains.h
+++ lib/Driver/ToolChains.h
@@ -165,6 +165,7 @@
     std::string CudaLibPath;
     std::string CudaLibDevicePath;
     std::string CudaIncludePath;
+    llvm::StringMap<std::string> CudaLibDeviceMap;
 
   public:
     CudaInstallationDetector() : IsValid(false) {}
@@ -185,6 +186,9 @@
     /// \brief Get the detected Cuda device library path.
     StringRef getLibDevicePath() const { return CudaLibDevicePath; }
     /// \brief Get libdevice file for given architecture
+    StringRef getLibDeviceFile(StringRef Gpu) const {
+      return CudaLibDeviceMap.lookup(Gpu);
+    }
   };
 
   CudaInstallationDetector CudaInstallation;
@@ -722,6 +726,8 @@
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
 
Index: lib/Driver/ToolChains.cpp
===================================================================
--- lib/Driver/ToolChains.cpp
+++ lib/Driver/ToolChains.cpp
@@ -1512,6 +1512,31 @@
           llvm::sys::fs::exists(CudaLibDevicePath)))
       continue;
 
+    const StringRef LibDeviceName = "libdevice.";
+    std::error_code EC;
+    for (llvm::sys::fs::directory_iterator LI(CudaLibDevicePath, EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef FilePath = LI->path();
+      StringRef FileName = llvm::sys::path::filename(FilePath);
+      // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
+      if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
+        continue;
+      StringRef GpuArch = FileName.slice(
+          LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
+      CudaLibDeviceMap[GpuArch] = FilePath.str();
+      // Insert map entries for specifc devices with this compute capability.
+      if (GpuArch == "compute_20") {
+        CudaLibDeviceMap["sm_20"] = FilePath;
+        CudaLibDeviceMap["sm_21"] = FilePath;
+      } else if (GpuArch == "compute_30") {
+        CudaLibDeviceMap["sm_30"] = FilePath;
+        CudaLibDeviceMap["sm_32"] = FilePath;
+      } else if (GpuArch == "compute_35") {
+        CudaLibDeviceMap["sm_35"] = FilePath;
+        CudaLibDeviceMap["sm_37"] = FilePath;
+      }
+    }
+
     IsValid = true;
     break;
   }
@@ -3785,6 +3810,15 @@
   }
 }
 
+void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                               ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nocudainc))
+    return;
+
+  if (CudaInstallation.isValid())
+    addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath());
+}
+
 bool Linux::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); }
 
 SanitizerMask Linux::getSupportedSanitizers() const {
@@ -3855,6 +3889,23 @@
                                      llvm::opt::ArgStringList &CC1Args) const {
   Linux::addClangTargetOptions(DriverArgs, CC1Args);
   CC1Args.push_back("-fcuda-is-device");
+
+  if (DriverArgs.hasArg(options::OPT_nocudalib))
+    return;
+
+  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(
+      DriverArgs.getLastArgValue(options::OPT_march_EQ));
+  if (!LibDeviceFile.empty()) {
+    CC1Args.push_back("-mlink-bitcode-file");
+    CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+    CC1Args.push_back("-fcuda-uses-libdevice");
+
+    // Libdevice in CUDA-7.0 requires PTX version that's more recent
+    // than LLVM defaults to. Use PTX4.2 which is the PTX version that
+    // came with CUDA-7.0.
+    CC1Args.push_back("-target-feature");
+    CC1Args.push_back("+ptx42");
+  }
 }
 
 llvm::opt::DerivedArgList *
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -501,3 +501,6 @@
     Res |= CFIICall;
   return Res;
 }
+
+void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                                   ArgStringList &CC1Args) const {}
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -350,6 +350,10 @@
   AddFastMathRuntimeIfAvailable(const llvm::opt::ArgList &Args,
                                 llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// \brief Add arguments to use system-specific CUDA includes.
+  virtual void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                                  llvm::opt::ArgStringList &CC1Args) const;
+
   /// \brief Return sanitizers which are available in this toolchain.
   virtual SanitizerMask getSupportedSanitizers() const;
 };
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1558,6 +1558,8 @@
 def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_terms">;
 def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option]>,
   HelpText<"Disable builtin #include directories">;
+def nocudainc : Flag<["-"], "nocudainc">;
+def nocudalib : Flag<["-"], "nocudalib">;
 def nodefaultlibs : Flag<["-"], "nodefaultlibs">;
 def nofixprebinding : Flag<["-"], "nofixprebinding">;
 def nolibc : Flag<["-"], "nolibc">;
Index: include/clang/Driver/Driver.h
===================================================================
--- include/clang/Driver/Driver.h
+++ include/clang/Driver/Driver.h
@@ -405,7 +405,6 @@
 
   bool IsUsingLTO(const llvm::opt::ArgList &Args) const;
 
-private:
   /// \brief Retrieves a ToolChain for a particular \p Target triple.
   ///
   /// Will cache ToolChains for the life of the driver object, and create them
@@ -415,6 +414,7 @@
 
   /// @}
 
+private:
   /// \brief Get bitmasks for which option flags to include and exclude based on
   /// the driver mode.
   std::pair<unsigned, unsigned> getIncludeExcludeOptionFlagMasks() const;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to