yaxunl updated this revision to Diff 339353. yaxunl marked an inline comment as done. yaxunl edited the summary of this revision. yaxunl added a comment.
revised by Artem's comments. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D99233/new/ https://reviews.llvm.org/D99233 Files: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/hip-options.hip Index: clang/test/Driver/hip-options.hip =================================================================== --- clang/test/Driver/hip-options.hip +++ clang/test/Driver/hip-options.hip @@ -51,3 +51,8 @@ // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=CTA %s // CTA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases" // CTA-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases" + +// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s +// THRESH: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000" +// THRESH-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000" Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6473,6 +6473,16 @@ if (IsHIP) CmdArgs.push_back("-fcuda-allow-variadic-functions"); + if (IsCudaDevice || IsHIPDevice) { + StringRef InlineThresh = + Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ); + if (!InlineThresh.empty()) { + std::string ArgStr = + std::string("-inline-threshold=") + InlineThresh.str(); + CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)}); + } + } + // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path // to specify the result of the compile phase on the host, so the meaningful // device declarations can be identified. Also, -fopenmp-is-device is passed Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -956,6 +956,9 @@ HelpText<"Default max threads per block for kernel launch bounds for HIP">, MarshallingInfoInt<LangOpts<"GPUMaxThreadsPerBlock">, "1024">, ShouldParseIf<hip.KeyPath>; +def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">, + Flags<[HelpHidden]>, + HelpText<"Inline threshold for device compilation for CUDA/HIP">; def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " "__cyg_profile_func_enter and __cyg_profile_func_exit">;
Index: clang/test/Driver/hip-options.hip =================================================================== --- clang/test/Driver/hip-options.hip +++ clang/test/Driver/hip-options.hip @@ -51,3 +51,8 @@ // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=CTA %s // CTA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases" // CTA-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases" + +// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s +// THRESH: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000" +// THRESH-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000" Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6473,6 +6473,16 @@ if (IsHIP) CmdArgs.push_back("-fcuda-allow-variadic-functions"); + if (IsCudaDevice || IsHIPDevice) { + StringRef InlineThresh = + Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ); + if (!InlineThresh.empty()) { + std::string ArgStr = + std::string("-inline-threshold=") + InlineThresh.str(); + CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)}); + } + } + // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path // to specify the result of the compile phase on the host, so the meaningful // device declarations can be identified. Also, -fopenmp-is-device is passed Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -956,6 +956,9 @@ HelpText<"Default max threads per block for kernel launch bounds for HIP">, MarshallingInfoInt<LangOpts<"GPUMaxThreadsPerBlock">, "1024">, ShouldParseIf<hip.KeyPath>; +def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">, + Flags<[HelpHidden]>, + HelpText<"Inline threshold for device compilation for CUDA/HIP">; def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " "__cyg_profile_func_enter and __cyg_profile_func_exit">;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits