llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Jan Patrick Lehr (jplehr) <details> <summary>Changes</summary> The new flag implements logic to include `#pragma omp requires unified_shared_memory` in every translation unit. This enables a straightforward way to enable USM for an application without the need to modify sources. This is the flag mentioned in https://github.com/llvm/llvm-project/pull/75467 Once the test landed, I'll rebase and enable the test with this patch. --- Full diff: https://github.com/llvm/llvm-project/pull/75468.diff 4 Files Affected: - (modified) clang/include/clang/Driver/Options.td (+2) - (modified) clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp (+14) - (modified) clang/lib/Headers/CMakeLists.txt (+1) - (added) clang/lib/Headers/openmp_wrappers/usm/force_usm.h (+6) ``````````diff diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1b02087425b751..b9cd3043a13a9a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm="> Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; +def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>, + Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>; //===----------------------------------------------------------------------===// // Shared cc1 + fc1 OpenMP Target Options diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index b012b7cb729378..2484a59085c276 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -129,6 +129,20 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); + + CC1Args.push_back("-internal-isystem"); + SmallString<128> P(HostTC.getDriver().ResourceDir); + llvm::sys::path::append(P, "include/cuda_wrappers"); + CC1Args.push_back(DriverArgs.MakeArgString(P)); + + // Force APU mode will focefully include #pragma omp requires + // unified_shared_memory via the force_usm header + if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) { + CC1Args.push_back("-include"); + CC1Args.push_back( + DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir + + "/include/openmp_wrappers/force_usm.h")); + } } void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index f8fdd402777e48..aac232fa8b4405 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -319,6 +319,7 @@ set(openmp_wrapper_files openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/complex_cmath.h openmp_wrappers/new + openmp_wrappers/usm/force_usm.h ) set(llvm_libc_wrapper_files diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h new file mode 100644 index 00000000000000..15c394e27ce9c2 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h @@ -0,0 +1,6 @@ +#ifndef __CLANG_FORCE_OPENMP_USM +#define __CLANG_FORCE_OPENMP_USM + +#pragma omp requires unified_shared_memory + +#endif `````````` </details> https://github.com/llvm/llvm-project/pull/75468 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits