llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> - [HIP] Make the HIP default architecture use the enum value - [Offload] Move HIP and CUDA to new driver by default --- Patch is 115.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84420.diff 50 Files Affected: - (modified) clang/include/clang/Basic/Cuda.h (+1-1) - (modified) clang/lib/Driver/Driver.cpp (+4-4) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+6-4) - (modified) clang/test/Driver/cl-offload.cu (+2-3) - (modified) clang/test/Driver/cuda-arch-translation.cu (+13-13) - (modified) clang/test/Driver/cuda-bad-arch.cu (+2-2) - (modified) clang/test/Driver/cuda-bindings.cu (+12-12) - (modified) clang/test/Driver/cuda-external-tools.cu (+7-7) - (modified) clang/test/Driver/cuda-options.cu (+12-11) - (modified) clang/test/Driver/cuda-output-asm.cu (-4) - (modified) clang/test/Driver/cuda-phases.cu (+12-12) - (modified) clang/test/Driver/cuda-version-check.cu (+3-3) - (modified) clang/test/Driver/hip-binding.hip (+5-5) - (modified) clang/test/Driver/hip-cuid-hash.hip (+4-4) - (modified) clang/test/Driver/hip-cuid.hip (+5) - (modified) clang/test/Driver/hip-dependent-options.hip (+2-2) - (modified) clang/test/Driver/hip-device-compile.hip (+11-11) - (modified) clang/test/Driver/hip-gz-options.hip (-1) - (modified) clang/test/Driver/hip-invalid-target-id.hip (+2-2) - (modified) clang/test/Driver/hip-link-bc-to-bc.hip (+2-2) - (modified) clang/test/Driver/hip-link-bundle-archive.hip (+11-11) - (modified) clang/test/Driver/hip-link-save-temps.hip (+4-4) - (modified) clang/test/Driver/hip-link-shared-library.hip (+1-2) - (modified) clang/test/Driver/hip-link-static-library.hip (+3-3) - (modified) clang/test/Driver/hip-macros.hip (-3) - (modified) clang/test/Driver/hip-offload-arch.hip (+1-1) - (modified) clang/test/Driver/hip-offload-compress-zlib.hip (+3-3) - (modified) clang/test/Driver/hip-offload-compress-zstd.hip (+3-3) - (modified) clang/test/Driver/hip-options.hip (+2-6) - (modified) clang/test/Driver/hip-output-file-name.hip (+13-13) - (modified) clang/test/Driver/hip-partial-link.hip (+7-7) - (modified) clang/test/Driver/hip-phases.hip (+53-53) - (modified) clang/test/Driver/hip-rdc-device-only.hip (+7-7) - (modified) clang/test/Driver/hip-sanitize-options.hip (+1-1) - (modified) clang/test/Driver/hip-save-temps.hip (+6-6) - (modified) clang/test/Driver/hip-target-id.hip (+5-5) - (modified) clang/test/Driver/hip-toolchain-device-only.hip (-4) - (modified) clang/test/Driver/hip-toolchain-features.hip (+12-12) - (modified) clang/test/Driver/hip-toolchain-mllvm.hip (-2) - (modified) clang/test/Driver/hip-toolchain-rdc-separate.hip (+4-4) - (modified) clang/test/Driver/hip-toolchain-rdc-static-lib.hip (+1-1) - (modified) clang/test/Driver/hip-toolchain-rdc.hip (+2-2) - (modified) clang/test/Driver/hip-unbundle-preproc.hipi (+3-3) - (modified) clang/test/Driver/hipspv-toolchain-rdc.hip (+1-1) - (modified) clang/test/Driver/hipspv-toolchain.hip (+1-1) - (modified) clang/test/Driver/invalid-offload-options.cpp (+1-1) - (modified) clang/test/Driver/lto.cu (+11-11) - (modified) clang/test/Driver/thinlto.cu (+2-2) - (modified) clang/test/Preprocessor/cuda-preprocess.cu (+4-4) - (modified) clang/unittests/Tooling/ToolingTest.cpp (+3-3) ``````````diff diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 916cb4b7ef34a7..3e77a74c7c0092 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -123,7 +123,7 @@ enum class CudaArch { LAST, CudaDefault = CudaArch::SM_52, - HIPDefault = CudaArch::GFX803, + HIPDefault = CudaArch::GFX906, }; static inline bool IsNVIDIAGpuArch(CudaArch A) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 96e6ad77f5e50d..e85a3e675408e4 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3373,7 +3373,7 @@ class OffloadingActionBuilder final { const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) { - DefaultCudaArch = CudaArch::GFX906; + DefaultCudaArch = CudaArch::HIPDefault; if (Args.hasArg(options::OPT_fhip_emit_relocatable, options::OPT_fno_hip_emit_relocatable)) { @@ -4115,9 +4115,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, handleArguments(C, Args, Inputs, Actions); bool UseNewOffloadingDriver = - C.isOffloadingHostKind(Action::OFK_OpenMP) || + C.getActiveOffloadKinds() != Action::OFK_None && Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false); + options::OPT_no_offload_new_driver, true); // Builder to be used to build offloading actions. std::unique_ptr<OffloadingActionBuilder> OffloadBuilder = @@ -4802,7 +4802,7 @@ Action *Driver::ConstructPhaseAction( offloadDeviceOnly() || (TargetDeviceOffloadKind == Action::OFK_HIP && !Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false))) + options::OPT_no_offload_new_driver, true))) ? types::TY_LLVM_IR : types::TY_LLVM_BC; return C.MakeAction<BackendJobAction>(Input, Output); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fa17f6295d6ea7..9605fb28e5fe34 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4687,8 +4687,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, bool IsHostOffloadingAction = JA.isHostOffloading(Action::OFK_OpenMP) || (JA.isHostOffloading(C.getActiveOffloadKinds()) && + C.getActiveOffloadKinds() != Action::OFK_None && Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)); + options::OPT_no_offload_new_driver, true)); bool IsRDCMode = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false); @@ -4997,7 +4998,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsUsingLTO) { if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) && !Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false) && + options::OPT_no_offload_new_driver, true) && !Triple.isAMDGPU()) { D.Diag(diag::err_drv_unsupported_opt_for_target) << Args.getLastArg(options::OPT_foffload_lto, @@ -6521,8 +6522,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } // Forward the new driver to change offloading code generation. - if (Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) + if (C.getActiveOffloadKinds() != Action::OFK_None && + Args.hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, true)) CmdArgs.push_back("--offload-new-driver"); SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType); diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu index b05bf3b97b7eb7..8f1200f1733597 100644 --- a/clang/test/Driver/cl-offload.cu +++ b/clang/test/Driver/cl-offload.cu @@ -18,11 +18,10 @@ // CUDA-SAME: "-Weverything" // CUDA: link -// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa" -// HIP-SAME: "-Weverything" // HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc" // HIP-SAME: "-Weverything" -// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}} +// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa" +// HIP-SAME: "-Weverything" // HIP: {{link.* "amdhip64.lib"}} // CMake uses this option when finding packages for HIP, so diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu index f37964d87c66a1..757defec959c47 100644 --- a/clang/test/Driver/cuda-arch-translation.cu +++ b/clang/test/Driver/cuda-arch-translation.cu @@ -69,19 +69,19 @@ // HIP: clang-offload-bundler -// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20 -// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20 -// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30 -// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32 -// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35 -// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37 -// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50 -// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52 -// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53 -// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60 -// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61 -// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62 -// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70 +// SM20:--image=profile=sm_20{{.*}} +// SM21:--image=profile=sm_21{{.*}} +// SM30:--image=profile=sm_30{{.*}} +// SM32:--image=profile=sm_32{{.*}} +// SM35:--image=profile=sm_35{{.*}} +// SM37:--image=profile=sm_37{{.*}} +// SM50:--image=profile=sm_50{{.*}} +// SM52:--image=profile=sm_52{{.*}} +// SM53:--image=profile=sm_53{{.*}} +// SM60:--image=profile=sm_60{{.*}} +// SM61:--image=profile=sm_61{{.*}} +// SM62:--image=profile=sm_62{{.*}} +// SM70:--image=profile=sm_70{{.*}} // GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600 // GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601 // GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602 diff --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu index 877b20bc9351bc..35a56a8bef0f0d 100644 --- a/clang/test/Driver/cuda-bad-arch.cu +++ b/clang/test/Driver/cuda-bad-arch.cu @@ -30,9 +30,9 @@ // RUN: | FileCheck -check-prefix OK %s // We don't allow using NVPTX/AMDGCN for host compilation. -// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \ +// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \ // RUN: | FileCheck -check-prefix HOST_NVPTX %s -// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \ +// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \ // RUN: | FileCheck -check-prefix HOST_AMDGCN %s // OK-NOT: error: Unsupported CUDA gpu architecture diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu index e130e08f20152a..3b95aa4bd50541 100644 --- a/clang/test/Driver/cuda-bindings.cu +++ b/clang/test/Driver/cuda-bindings.cu @@ -26,14 +26,14 @@ // BIN-NOT: cuda-bindings-device-cuda-nvptx64 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: // BIN-NOT: cuda-bindings-device-cuda-nvptx64 -// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out" +// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out" // // Test single gpu architecture up to the assemble phase. // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM %s -// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" +// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]" // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // @@ -64,8 +64,8 @@ // BIN2-NOT: cuda-bindings-device-cuda-nvptx64 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: // BIN2-NOT: cuda-bindings-device-cuda-nvptx64 -// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out" -// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out" +// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out" +// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out" // .. same, but with -fsyntax-only // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ @@ -81,9 +81,9 @@ // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \ // RUN: | FileCheck -check-prefix=SYN %s // SYN-NOT: inputs: -// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing) -// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) +// SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) +// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing) // SYN-NOT: inputs // .. and with --offload-new-driver @@ -100,7 +100,7 @@ // RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \ // RUN: | FileCheck -check-prefix=NDSYN %s // NDSYN-NOT: inputs: -// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) +// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing) // NDSYN-NOT: inputs: @@ -112,8 +112,8 @@ // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM2 %s -// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" -// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]" // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // @@ -128,7 +128,7 @@ // RUN: | FileCheck -check-prefix=HBIN %s // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: // HBIN-NOT: cuda-bindings-device-cuda-nvptx64 -// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out" +// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out" // // Test one or more gpu architecture up to the assemble phase in host-only @@ -166,7 +166,7 @@ // Test two gpu architectures with complete compilation in device-only // compilation mode. // -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ +// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \ // RUN: | FileCheck -check-prefix=DBIN2 %s // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: @@ -180,7 +180,7 @@ // Test two gpu architectures up to the assemble phase in device-only // compilation mode. // -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ +// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \ // RUN: | FileCheck -check-prefix=DASM2 %s // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu index 1aa87cc09982c6..946e144fce38fb 100644 --- a/clang/test/Driver/cuda-external-tools.cu +++ b/clang/test/Driver/cuda-external-tools.cu @@ -25,7 +25,7 @@ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s // Generating relocatable device code // RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // With debugging enabled, ptxas should be run with with no ptxas optimizations. @@ -59,7 +59,7 @@ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s // Separate compilation targeting sm_35. // RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // 32-bit compile. @@ -68,7 +68,7 @@ // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s // 32-bit compile when generating relocatable device code. // RUN: %clang -### --target=i386-linux-gnu -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s // Compile with -fintegrated-as. This should still cause us to invoke ptxas. @@ -77,7 +77,7 @@ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s // Check that we still pass -c when generating relocatable device code. // RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // Check -Xcuda-ptxas and -Xcuda-fatbinary @@ -99,13 +99,13 @@ // Check relocatable device code generation on MacOS. // RUN: %clang -### --target=x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s // RUN: %clang -### --target=i386-apple-macosx -fgpu-rdc -c %s 2>&1 \ -// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s // Check that CLANG forwards the -v flag to PTXAS. diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index 8999a6618fe1fa..3aef694b56f496 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -4,13 +4,13 @@ // Simple compilation case. Compile device-side to PTX assembly and make sure // we use it on the host side. -// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \ +// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix NOLINK %s // Typical compilation + link case. -// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \ +// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix LINK %s @@ -35,7 +35,7 @@ // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \ -// RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \ +// RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s @@ -50,27 +50,27 @@ // RUN: -check-prefix NOHOST -check-prefix NOLINK %s // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \ -// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ +// RUN: --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix LINK %s // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ -// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ +// RUN: --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix LINK %s // Verify that --cuda-gpu-arch option passes the correct GPU architecture to // device compilation. -// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \ +// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \ // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s // Verify that there is one device-side compilation per --cuda-gpu-arch args // and that all results are included on the host side. -// RUN: %clang -### --target=x86_64-linux-gnu \ +// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \ // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ // RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \ @@ -130,9 +130,9 @@ // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ -// RUN: --no-cuda-gpu-arch=all \ +// RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \ // RUN: --cuda-gpu-arch=sm_70 \ -// RUN: -c -nogpulib -nogpuinc %s 2>&1 \ +// RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s // g) There's no --cuda-gpu-arch=all @@ -143,7 +143,7 @@ // Verify that --[no-]cuda-include-ptx arguments are handled correctly. -// a) by default we're including PTX for all GPUs. +// a) by default we're not including PTX for all GPUs. // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ // RUN: -c %s 2>&1 \ @@ -185,7 +185,8 @@ // Verify -flto=thin -fwhole-program-vtables handling. This should result in // both options being passed to the host compilation, with neither passed to // the device compilation. -// RUN: %clang -... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/84420 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits