https://github.com/jplehr updated https://github.com/llvm/llvm-project/pull/75468
>From 8f381c760fca8a4abd7550c492ff22fa8972933a Mon Sep 17 00:00:00 2001 From: JP Lehr <janpatrick.l...@amd.com> Date: Thu, 6 Jul 2023 16:47:21 -0400 Subject: [PATCH 1/3] [OpenMP] Introduce -fopenmp-force-usm flag The new flag implements logic to include #pragma omp requires unified_shared_memory in every translation unit. This enables a straightforward way to enable USM for an application without the need to modify sources. --- clang/include/clang/Driver/Options.td | 2 ++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 ++++++++++++++++ clang/lib/Headers/CMakeLists.txt | 1 + .../lib/Headers/openmp_wrappers/usm/force_usm.h | 6 ++++++ 4 files changed, 25 insertions(+) create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2b93ddf033499c..e33bc7d1b10d71 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm="> Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; +def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>, + Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; //===----------------------------------------------------------------------===// // Shared cc1 + fc1 OpenMP Target Options diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index b012b7cb729378..a077f2f06d7728 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); + + CC1Args.push_back("-internal-isystem"); + SmallString<128> P(HostTC.getDriver().ResourceDir); + llvm::sys::path::append(P, "include/cuda_wrappers"); + CC1Args.push_back(DriverArgs.MakeArgString(P)); + + // Force USM mode will forcefully include #pragma omp requires + // unified_shared_memory via the force_usm header + // XXX This may result in a compilation error if the source + // file already includes that pragma. + if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) { + CC1Args.push_back("-include"); + CC1Args.push_back( + DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir + + "/include/openmp_wrappers/force_usm.h")); + } } void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 735e4e4e3be89b..ed491779abcd00 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -320,6 +320,7 @@ set(openmp_wrapper_files openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/complex_cmath.h openmp_wrappers/new + openmp_wrappers/usm/force_usm.h ) set(llvm_libc_wrapper_files diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h new file mode 100644 index 00000000000000..15c394e27ce9c2 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h @@ -0,0 +1,6 @@ +#ifndef __CLANG_FORCE_OPENMP_USM +#define __CLANG_FORCE_OPENMP_USM + +#pragma omp requires unified_shared_memory + +#endif >From 4d5a1f670b3bdd5b183515e347610414cb12cb90 Mon Sep 17 00:00:00 2001 From: JP Lehr <janpatrick.l...@amd.com> Date: Fri, 29 Dec 2023 04:33:19 -0500 Subject: [PATCH 2/3] Revert "[OpenMP] Introduce -fopenmp-force-usm flag" This reverts commit 4ecd07d786a5a994b33b9177d4e21d839bfe3fc9. To test the other solution. --- clang/include/clang/Driver/Options.td | 2 -- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 ---------------- clang/lib/Headers/CMakeLists.txt | 1 - .../lib/Headers/openmp_wrappers/usm/force_usm.h | 6 ------ 4 files changed, 25 deletions(-) delete mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e33bc7d1b10d71..2b93ddf033499c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3381,8 +3381,6 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm="> Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; -def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>, - Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; //===----------------------------------------------------------------------===// // Shared cc1 + fc1 OpenMP Target Options diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index a077f2f06d7728..b012b7cb729378 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -129,22 +129,6 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); - - CC1Args.push_back("-internal-isystem"); - SmallString<128> P(HostTC.getDriver().ResourceDir); - llvm::sys::path::append(P, "include/cuda_wrappers"); - CC1Args.push_back(DriverArgs.MakeArgString(P)); - - // Force USM mode will forcefully include #pragma omp requires - // unified_shared_memory via the force_usm header - // XXX This may result in a compilation error if the source - // file already includes that pragma. - if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) { - CC1Args.push_back("-include"); - CC1Args.push_back( - DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir + - "/include/openmp_wrappers/force_usm.h")); - } } void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index ed491779abcd00..735e4e4e3be89b 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -320,7 +320,6 @@ set(openmp_wrapper_files openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/complex_cmath.h openmp_wrappers/new - openmp_wrappers/usm/force_usm.h ) set(llvm_libc_wrapper_files diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h deleted file mode 100644 index 15c394e27ce9c2..00000000000000 --- a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CLANG_FORCE_OPENMP_USM -#define __CLANG_FORCE_OPENMP_USM - -#pragma omp requires unified_shared_memory - -#endif >From f0aaefbe923d2daa1752f3a9664dab3958346c51 Mon Sep 17 00:00:00 2001 From: JP Lehr <janpatrick.l...@amd.com> Date: Fri, 29 Dec 2023 04:32:24 -0500 Subject: [PATCH 3/3] [OpenMP] Implicit USM Clause Solution This uses an implicitly added OpenMP USM Clause when initializing SEMA to enforce the use of USM. --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 ++++++ clang/lib/Driver/ToolChains/Clang.cpp | 2 ++ 4 files changed, 13 insertions(+) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 21abc346cf17ac..81cf2ad9498a7f 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -260,6 +260,7 @@ LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have mo LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.") LANGOPT(OpenMPNoNestedParallelism , 1, 0, "Assume that no thread in a parallel region will encounter a parallel region") LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.") +LANGOPT(OpenMPForceUSM , 1, 0, "Enable OpenMP unified shared memory mode via compiler.") LANGOPT(NoGPULib , 1, 0, "Indicate a build without the standard GPU libraries.") LANGOPT(RenderScript , 1, 0, "RenderScript") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2b93ddf033499c..28290da438c62d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3451,6 +3451,10 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group< Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>, HelpText<"Do not create a host fallback if offloading to the device fails.">, MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>; +def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>, + Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>, + HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">, + MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>; def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>, Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CLOption]>, HelpText<"Emit code that can be JIT compiled for OpenMP offloading. Implies -foffload-lto=full">; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ea6645a39e8321..09204c30175f64 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1044,6 +1044,12 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) ? CGM.getLangOpts().OMPHostIRFile : StringRef{}); OMPBuilder.setConfig(Config); + + // The user forces the compiler to behave as if omp requires unified_shared_memory was given. + if (CGM.getLangOpts().OpenMPForceUSM) { + HasRequiresUnifiedSharedMemory = true; + OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); + } } void CGOpenMPRuntime::clear() { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index acfa119805068d..ffc24201ab2e0b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6382,6 +6382,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); if (Args.hasArg(options::OPT_fopenmp_offload_mandatory)) CmdArgs.push_back("-fopenmp-offload-mandatory"); + if (Args.hasArg(options::OPT_fopenmp_force_usm)) + CmdArgs.push_back("-fopenmp-force-usm"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits