hliao updated this revision to Diff 269555. hliao added a comment. Revise following reviewer's comment.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D81427/new/ https://reviews.llvm.org/D81427 Files: clang/lib/Driver/Driver.cpp clang/test/Driver/hip-device-only.hip clang/test/Driver/hip-rdc-device-only.hip
Index: clang/test/Driver/hip-device-only.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-device-only.hip @@ -1,144 +0,0 @@ -// REQUIRES: clang-driver -// REQUIRES: x86-registered-target -// REQUIRES: amdgpu-registered-target - -// RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ -// RUN: -c -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \ -// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s - -// With `-emit-llvm`, the output should be the same as the aforementioned line -// as `-fgpu-rdc` in HIP implies `-emit-llvm`. - -// RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ -// RUN: -c -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \ -// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s - -// RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ -// RUN: -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \ -// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s - -// With `-emit-llvm`, the output should be the same as the aforementioned line -// as `-fgpu-rdc` in HIP implies `-emit-llvm`. - -// RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ -// RUN: -S -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \ -// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s - -// With `-save-temps`, commane lines for each steps are dumped. For assembly -// output, there should 3 steps (preprocessor, compile, and backend) per source -// and per target, totally 12 steps. - -// RUN: %clang -### -save-temps -target x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ -// RUN: -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \ -// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck -check-prefix=SAVETEMP %s - -// COMMON: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// EMITBC-SAME: "-emit-llvm-bc" -// EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" -// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" -// COMMON-SAME: "-fapply-global-visibility-to-externs" -// EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip" -// EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip" -// CHECK-SAME: {{.*}} {{".*a.cu"}} - -// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// EMITBC-SAME: "-emit-llvm-bc" -// EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" -// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" -// COMMON-SAME: "-fapply-global-visibility-to-externs" -// EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip" -// EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip" -// COMMON-SAME: {{.*}} {{".*a.cu"}} - -// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// EMITBC-SAME: "-emit-llvm-bc" -// EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" -// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" -// COMMON-SAME: "-fapply-global-visibility-to-externs" -// EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip" -// EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip" -// COMMON-SAME: {{.*}} {{".*b.hip"}} - -// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// EMITBC-SAME: "-emit-llvm-bc" -// EMITLL-SAME: "-emit-llvm" -// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" -// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" -// COMMON-SAME: "-fapply-global-visibility-to-externs" -// EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip" -// EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip" -// COMMON-SAME: {{.*}} {{".*b.hip"}} - -// SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-E" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" -// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm-bc" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" -// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX803_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" -// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX803_TMP_BC]] - -// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-E" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" -// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm-bc" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" -// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX900_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" -// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX900_TMP_BC]] - -// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-E" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" -// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm-bc" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" -// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX803_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" -// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX803_TMP_BC]] - -// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-E" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" -// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm-bc" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" -// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX900_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" -// SAVETEMP-SAME: "-emit-llvm" -// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" -// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX900_TMP_BC]] Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -2705,9 +2705,7 @@ // backend and assemble phases to output LLVM IR. Except for generating // non-relocatable device coee, where we generate fat binary for device // code and pass to host in Backend phase. - if (CudaDeviceActions.empty() || - (CurPhase == phases::Backend && Relocatable) || - CurPhase == phases::Assemble) + if (CudaDeviceActions.empty()) return ABRT_Success; assert(((CurPhase == phases::Link && Relocatable) || @@ -2781,9 +2779,11 @@ } // By default, we produce an action for each device arch. - for (Action *&A : CudaDeviceActions) - A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, - AssociatedOffloadKind); + if (!Relocatable || CurPhase <= phases::Backend) { + for (Action *&A : CudaDeviceActions) + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, + AssociatedOffloadKind); + } return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host : ABRT_Success; @@ -3668,7 +3668,10 @@ Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction<BackendJobAction>(Input, Output); } - if (Args.hasArg(options::OPT_emit_llvm)) { + if (Args.hasArg(options::OPT_emit_llvm) || + (TargetDeviceOffloadKind == Action::OFK_HIP && + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false))) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC; return C.MakeAction<BackendJobAction>(Input, Output); @@ -4588,8 +4591,19 @@ // When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for // the unoptimized bitcode so that it does not get overwritten by the ".bc" // optimized bitcode output. - if (!AtTopLevel && C.getArgs().hasArg(options::OPT_emit_llvm) && - JA.getType() == types::TY_LLVM_BC) + auto IsHIPRDCInCompilePhase = [](const JobAction &JA, + const llvm::opt::DerivedArgList &Args) { + // The relocatable compilation in HIP implies -emit-llvm. Similarly, use a + // ".tmp.bc" suffix for the unoptimized bitcode (generated in the compile + // phase.) + return isa<CompileJobAction>(JA) && + JA.getOffloadingDeviceKind() == Action::OFK_HIP && + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false); + }; + if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC && + (C.getArgs().hasArg(options::OPT_emit_llvm) || + IsHIPRDCInCompilePhase(JA, C.getArgs()))) Suffixed += ".tmp"; Suffixed += '.'; Suffixed += Suffix;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits