hliao updated this revision to Diff 269555.
hliao added a comment.

Revise following reviewer's comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81427/new/

https://reviews.llvm.org/D81427

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/hip-device-only.hip
  clang/test/Driver/hip-rdc-device-only.hip

Index: clang/test/Driver/hip-device-only.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/hip-device-only.hip
@@ -1,144 +0,0 @@
-// REQUIRES: clang-driver
-// REQUIRES: x86-registered-target
-// REQUIRES: amdgpu-registered-target
-
-// RUN: %clang -### -target x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -c -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
-// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s
-
-// With `-emit-llvm`, the output should be the same as the aforementioned line
-// as `-fgpu-rdc` in HIP implies `-emit-llvm`.
-
-// RUN: %clang -### -target x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -c -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
-// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s
-
-// RUN: %clang -### -target x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
-// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s
-
-// With `-emit-llvm`, the output should be the same as the aforementioned line
-// as `-fgpu-rdc` in HIP implies `-emit-llvm`.
-
-// RUN: %clang -### -target x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -S -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
-// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s
-
-// With `-save-temps`, commane lines for each steps are dumped. For assembly
-// output, there should 3 steps (preprocessor, compile, and backend) per source
-// and per target, totally 12 steps.
-
-// RUN: %clang -### -save-temps -target x86_64-linux-gnu \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
-// RUN:   -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
-// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck -check-prefix=SAVETEMP %s
-
-// COMMON: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// EMITBC-SAME: "-emit-llvm-bc"
-// EMITLL-SAME: "-emit-llvm"
-// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
-// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
-// COMMON-SAME: "-fapply-global-visibility-to-externs"
-// EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip"
-// CHECK-SAME: {{.*}} {{".*a.cu"}}
-
-// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// EMITBC-SAME: "-emit-llvm-bc"
-// EMITLL-SAME: "-emit-llvm"
-// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
-// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
-// COMMON-SAME: "-fapply-global-visibility-to-externs"
-// EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip"
-// COMMON-SAME: {{.*}} {{".*a.cu"}}
-
-// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// EMITBC-SAME: "-emit-llvm-bc"
-// EMITLL-SAME: "-emit-llvm"
-// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
-// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
-// COMMON-SAME: "-fapply-global-visibility-to-externs"
-// EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip"
-// COMMON-SAME: {{.*}} {{".*b.hip"}}
-
-// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// EMITBC-SAME: "-emit-llvm-bc"
-// EMITLL-SAME: "-emit-llvm"
-// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
-// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
-// COMMON-SAME: "-fapply-global-visibility-to-externs"
-// EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip"
-// COMMON-SAME: {{.*}} {{".*b.hip"}}
-
-// SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-E"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}}
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm-bc"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX803_CUI]]
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX803_TMP_BC]]
-
-// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-E"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}}
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm-bc"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX900_CUI]]
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX900_TMP_BC]]
-
-// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-E"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}}
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm-bc"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX803_CUI]]
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX803_TMP_BC]]
-
-// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-E"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}}
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm-bc"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX900_CUI]]
-// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
-// SAVETEMP-SAME: "-emit-llvm"
-// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX900_TMP_BC]]
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2705,9 +2705,7 @@
       // backend and assemble phases to output LLVM IR. Except for generating
       // non-relocatable device coee, where we generate fat binary for device
       // code and pass to host in Backend phase.
-      if (CudaDeviceActions.empty() ||
-          (CurPhase == phases::Backend && Relocatable) ||
-          CurPhase == phases::Assemble)
+      if (CudaDeviceActions.empty())
         return ABRT_Success;
 
       assert(((CurPhase == phases::Link && Relocatable) ||
@@ -2781,9 +2779,11 @@
       }
 
       // By default, we produce an action for each device arch.
-      for (Action *&A : CudaDeviceActions)
-        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
-                                               AssociatedOffloadKind);
+      if (!Relocatable || CurPhase <= phases::Backend) {
+        for (Action *&A : CudaDeviceActions)
+          A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
+                                                 AssociatedOffloadKind);
+      }
 
       return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
                                                            : ABRT_Success;
@@ -3668,7 +3668,10 @@
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
     }
-    if (Args.hasArg(options::OPT_emit_llvm)) {
+    if (Args.hasArg(options::OPT_emit_llvm) ||
+        (TargetDeviceOffloadKind == Action::OFK_HIP &&
+         Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+                      false))) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
@@ -4588,8 +4591,19 @@
     // When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
     // the unoptimized bitcode so that it does not get overwritten by the ".bc"
     // optimized bitcode output.
-    if (!AtTopLevel && C.getArgs().hasArg(options::OPT_emit_llvm) &&
-        JA.getType() == types::TY_LLVM_BC)
+    auto IsHIPRDCInCompilePhase = [](const JobAction &JA,
+                                     const llvm::opt::DerivedArgList &Args) {
+      // The relocatable compilation in HIP implies -emit-llvm. Similarly, use a
+      // ".tmp.bc" suffix for the unoptimized bitcode (generated in the compile
+      // phase.)
+      return isa<CompileJobAction>(JA) &&
+             JA.getOffloadingDeviceKind() == Action::OFK_HIP &&
+             Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+                          false);
+    };
+    if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC &&
+        (C.getArgs().hasArg(options::OPT_emit_llvm) ||
+         IsHIPRDCInCompilePhase(JA, C.getArgs())))
       Suffixed += ".tmp";
     Suffixed += '.';
     Suffixed += Suffix;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to