[PATCH] D13144: [CUDA] propagate to CUDA sub-compilations target triple of opposite side.

Artem Belevich via cfe-commits Thu, 24 Sep 2015 13:05:42 -0700

tra created this revision.
tra added a reviewer: echristo.
tra added a subscriber: cfe-commits.


Propagates AuxTriple throughout job pipeline construction in driver and on to 
Tool::ConstructJob().
It in turn passes target triple of opposite side of CUDA compilation as 
-aux-triple option to sub-compilations.


http://reviews.llvm.org/D13144

Files:
  include/clang/Driver/Driver.h
  include/clang/Driver/Tool.h
  lib/Driver/Driver.cpp
  lib/Driver/Tools.cpp
  lib/Driver/Tools.h
  test/Driver/cuda-options.cu
  test/SemaCUDA/function-target-hd.cu

Index: test/SemaCUDA/function-target-hd.cu
===================================================================
--- test/SemaCUDA/function-target-hd.cu
+++ test/SemaCUDA/function-target-hd.cu
@@ -8,9 +8,9 @@
 // host device functions are not allowed to call device functions.
 
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s
 // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
 
 #include "Inputs/cuda.h"
 
Index: test/Driver/cuda-options.cu
===================================================================
--- test/Driver/cuda-options.cu
+++ test/Driver/cuda-options.cu
@@ -111,14 +111,6 @@
 // Make sure we don't link anything.
 // RUN:   -check-prefix CUDA-NL %s
 
-// Match device-side preprocessor, and compiler phases with -save-temps
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda"
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
-
 // --cuda-host-only should never trigger unused arg warning.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \
 // RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
@@ -133,34 +125,47 @@
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \
 // RUN:    FileCheck -check-prefix CUDA-UNUSED-CDO %s
 
+// Match device-side preprocessor, and compiler phases with -save-temps
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda"
+
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
+
 // Match the job that produces PTX assembly
-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D1-SAME: "-fcuda-is-device"
 // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
 // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
 // CUDA-D1NS-SAME: "-x" "cuda"
 // CUDA-D1S-SAME: "-x" "ir"
 
-// Match anothe device-side compilation
-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Match another device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D2-SAME: "-fcuda-is-device"
 // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
 // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
 // CUDA-D2-SAME: "-x" "cuda"
 
 // Match no device-side compilation
-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CUDA-ND-SAME-NOT: "-fcuda-is-device"
 
 // Match host-side preprocessor job with -save-temps
-// CUDA-HS: "-cc1" "-triple"
-// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-HS-SAME-NOT: "-fcuda-is-device"
 // CUDA-HS-SAME: "-x" "cuda"
 
 // Match host-side compilation
-// CUDA-H: "-cc1" "-triple"
-// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-H-SAME-NOT: "-fcuda-is-device"
 // CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 // CUDA-HNS-SAME: "-x" "cuda"
Index: lib/Driver/Tools.h
===================================================================
--- lib/Driver/Tools.h
+++ lib/Driver/Tools.h
@@ -106,8 +106,15 @@
 
   void ConstructJob(Compilation &C, const JobAction &JA,
                     const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs, const char *LinkingOutput,
+                    const char *AuxTriple) const override;
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
                     const llvm::opt::ArgList &TCArgs,
-                    const char *LinkingOutput) const override;
+                    const char *LinkingOutput) const override {
+    llvm_unreachable("Should never be called");
+  };
 };
 
 /// \brief Clang integrated assembler tool.
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -3169,7 +3169,8 @@
 
 void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                          const InputInfo &Output, const InputInfoList &Inputs,
-                         const ArgList &Args, const char *LinkingOutput) const {
+                         const ArgList &Args, const char *LinkingOutput,
+                         const char *AuxTriple) const {
   std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   const llvm::Triple Triple(TripleStr);
 
@@ -3201,6 +3202,11 @@
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
+  if (IsCuda && AuxTriple != nullptr) {
+    CmdArgs.push_back("-aux-triple");
+    CmdArgs.push_back(AuxTriple);
+  }
+
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
                                Triple.getArch() == llvm::Triple::thumb)) {
     unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -1762,21 +1762,22 @@
 void Driver::BuildJobsForAction(Compilation &C, const Action *A,
                                 const ToolChain *TC, const char *BoundArch,
                                 bool AtTopLevel, bool MultipleArchs,
-                                const char *LinkingOutput,
-                                InputInfo &Result) const {
+                                const char *LinkingOutput, InputInfo &Result,
+                                const char *AuxTriple) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
   InputInfoList CudaDeviceInputInfos;
   if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
     InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
                          /*MultipleArchs*/ false, LinkingOutput, II);
       CudaDeviceInputInfos.push_back(II);
     }
     // Override current action with a real host compile action and continue
     // processing it.
+    AuxTriple = CHA->getDeviceTriple();
     A = *CHA->begin();
   }
 
@@ -1806,16 +1807,20 @@
       TC = &C.getDefaultToolChain();
 
     BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
-                       MultipleArchs, LinkingOutput, Result);
+                       MultipleArchs, LinkingOutput, Result, AuxTriple);
     return;
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
+    // Initial processing of CudaDeviceAction carries host params.
+    // Call BuildJobsForAction() again, now with correct device parameters.
+    assert(CDA->getGpuArchName() && "No GPU name in device action.");
     BuildJobsForAction(
         C, *CDA->begin(),
         &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
         CDA->getGpuArchName(), CDA->isAtTopLevel(),
-        /*MultipleArchs*/ true, LinkingOutput, Result);
+        /*MultipleArchs*/ true, LinkingOutput, Result,
+        TC->getTriple().str().c_str());
     return;
   }
 
@@ -1834,9 +1839,10 @@
     InputInfo II;
     for (const Action *DA : CollapsedCHA->getDeviceActions()) {
       BuildJobsForAction(C, DA, TC, "", AtTopLevel,
-                         /*MultipleArchs*/ false, LinkingOutput, II);
+                         /*MultipleArchs*/ false, LinkingOutput, II, AuxTriple);
       CudaDeviceInputInfos.push_back(II);
     }
+    AuxTriple = CollapsedCHA->getDeviceTriple();
   }
 
   // Only use pipes when there is exactly one input.
@@ -1851,7 +1857,7 @@
 
     InputInfo II;
     BuildJobsForAction(C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs,
-                       LinkingOutput, II);
+                       LinkingOutput, II, AuxTriple);
     InputInfos.push_back(II);
   }
 
@@ -1886,7 +1892,8 @@
     llvm::errs() << "], output: " << Result.getAsString() << "\n";
   } else {
     T->ConstructJob(C, *JA, Result, InputInfos,
-                    C.getArgsForToolChain(TC, BoundArch), LinkingOutput);
+                    C.getArgsForToolChain(TC, BoundArch), LinkingOutput,
+                    AuxTriple);
   }
 }
 
Index: include/clang/Driver/Tool.h
===================================================================
--- include/clang/Driver/Tool.h
+++ include/clang/Driver/Tool.h
@@ -124,11 +124,20 @@
   /// tool chain specific translations applied.
   /// \param LinkingOutput - If this output will eventually feed the
   /// linker, then this is the final output name of the linked image.
+  /// \param AuxTriple - Additional triple to pass during CUDA compilation.
   virtual void ConstructJob(Compilation &C, const JobAction &JA,
                             const InputInfo &Output,
                             const InputInfoList &Inputs,
                             const llvm::opt::ArgList &TCArgs,
                             const char *LinkingOutput) const = 0;
+
+  /// \brief default implementation for classes that don't care about AuxTriple.
+  virtual void
+  ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output,
+               const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs,
+               const char *LinkingOutput, const char *AuxTriple) const {
+    ConstructJob(C, JA, Output, Inputs, TCArgs, LinkingOutput);
+  }
 };
 
 } // end namespace driver
Index: include/clang/Driver/Driver.h
===================================================================
--- include/clang/Driver/Driver.h
+++ include/clang/Driver/Driver.h
@@ -369,7 +369,8 @@
                           bool AtTopLevel,
                           bool MultipleArchs,
                           const char *LinkingOutput,
-                          InputInfo &Result) const;
+                          InputInfo &Result,
+                          const char *AuxTriple = nullptr) const;
 
   /// Returns the default name for linked images (e.g., "a.out").
   const char *getDefaultImageName() const;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D13144: [CUDA] propagate to CUDA sub-compilations target triple of opposite side.

Reply via email to