[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-29 Thread Jan Patrick Lehr via cfe-commits

https://github.com/jplehr closed https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-29 Thread Jan Patrick Lehr via cfe-commits

jplehr wrote:

Hmm.. I guess I screwed something up with git and the history.

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-29 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 3c92011b600bdf70424e2547594dd461fe411a41 
f0aaefbe923d2daa1752f3a9664dab3958346c51 -- 
clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/Driver/ToolChains/Clang.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 09204c3017..4855e7410a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1045,7 +1045,8 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule )
  : StringRef{});
   OMPBuilder.setConfig(Config);
 
-  // The user forces the compiler to behave as if omp requires 
unified_shared_memory was given.
+  // The user forces the compiler to behave as if omp requires
+  // unified_shared_memory was given.
   if (CGM.getLangOpts().OpenMPForceUSM) {
 HasRequiresUnifiedSharedMemory = true;
 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);

``




https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-29 Thread Jan Patrick Lehr via cfe-commits

https://github.com/jplehr updated 
https://github.com/llvm/llvm-project/pull/75468

>From 8f381c760fca8a4abd7550c492ff22fa8972933a Mon Sep 17 00:00:00 2001
From: JP Lehr 
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH 1/3] [OpenMP] Introduce -fopenmp-force-usm flag

The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
 clang/include/clang/Driver/Options.td|  2 ++
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 
 clang/lib/Headers/CMakeLists.txt |  1 +
 .../lib/Headers/openmp_wrappers/usm/force_usm.h  |  6 ++
 4 files changed, 25 insertions(+)
 create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..e33bc7d1b10d71 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..a077f2f06d7728 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 735e4e4e3be89b..ed491779abcd00 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -320,6 +320,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

>From 4d5a1f670b3bdd5b183515e347610414cb12cb90 Mon Sep 17 00:00:00 2001
From: JP Lehr 
Date: Fri, 29 Dec 2023 04:33:19 -0500
Subject: [PATCH 2/3] Revert "[OpenMP] Introduce -fopenmp-force-usm flag"

This reverts commit 4ecd07d786a5a994b33b9177d4e21d839bfe3fc9.

To test the other solution.
---
 clang/include/clang/Driver/Options.td|  2 --
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 
 clang/lib/Headers/CMakeLists.txt |  1 -
 .../lib/Headers/openmp_wrappers/usm/force_usm.h  |  6 --
 4 files changed, 25 deletions(-)
 delete mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index e33bc7d1b10d71..2b93ddf033499c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,8 +3381,6 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   

[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-15 Thread Jan Patrick Lehr via cfe-commits

https://github.com/jplehr updated 
https://github.com/llvm/llvm-project/pull/75468

>From 4ecd07d786a5a994b33b9177d4e21d839bfe3fc9 Mon Sep 17 00:00:00 2001
From: JP Lehr 
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH] [OpenMP] Introduce -fopenmp-force-usm flag

The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
 clang/include/clang/Driver/Options.td|  2 ++
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 
 clang/lib/Headers/CMakeLists.txt |  1 +
 .../lib/Headers/openmp_wrappers/usm/force_usm.h  |  6 ++
 4 files changed, 25 insertions(+)
 create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1b02087425b751..73325d5620cc10 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..a077f2f06d7728 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Joseph Huber via cfe-commits


@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;

jhuber6 wrote:

No, it would just override the flag before it. E.g. `-fopenmp-force-usm 
-fno-openmp-force-usm` would return to not having it on.

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Jan Patrick Lehr via cfe-commits


@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;

jplehr wrote:

With the intent to remove the USM behavior from a codebase that has the 
requires pragma, by basically just ignoring it?

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Joseph Huber via cfe-commits


@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;

jhuber6 wrote:

`-f` options tend to have a `-fno` variant as well.

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Jan Patrick Lehr via cfe-commits


@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));

jplehr wrote:

I'm happy to change that to something more reasonable, if you can point out 
where to look for inspiration on how to do it properly.

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Joseph Huber via cfe-commits


@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));

jhuber6 wrote:

Here's the patch for `-fopenmp-offload-mandatory` which is a similar use-case 
https://reviews.llvm.org/D120353.

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Joseph Huber via cfe-commits


@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));

jhuber6 wrote:

I don't think this is a good way to handle this. We should make this a CC1 
argument, forward it in the standard way, and make `CGOpenMPRuntime` always 
emit the associated runtime call.

Also note that I'm planning on removing the current "requires" handling because 
emitting spurious global constructors into the runtime is difficult to work 
around.

https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Jan Patrick Lehr via cfe-commits

https://github.com/jplehr updated 
https://github.com/llvm/llvm-project/pull/75468

>From 9809ba1ec31cb1a4a066f709ae8bd3e965e1 Mon Sep 17 00:00:00 2001
From: JP Lehr 
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH] [OpenMP] Introduce -fopenmp-force-usm flag

The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
 clang/include/clang/Driver/Options.td|  2 ++
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 
 clang/lib/Headers/CMakeLists.txt |  1 +
 .../lib/Headers/openmp_wrappers/usm/force_usm.h  |  6 ++
 4 files changed, 25 insertions(+)
 create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1b02087425b751..b9cd3043a13a9a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..a077f2f06d7728 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Jan Patrick Lehr (jplehr)


Changes

The new flag implements logic to include `#pragma omp requires 
unified_shared_memory` in every translation unit.
This enables a straightforward way to enable USM for an application without the 
need to modify sources.

This is the flag mentioned in https://github.com/llvm/llvm-project/pull/75467
Once the test landed, I'll rebase and enable the test with this patch.

---
Full diff: https://github.com/llvm/llvm-project/pull/75468.diff


4 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+2) 
- (modified) clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp (+14) 
- (modified) clang/lib/Headers/CMakeLists.txt (+1) 
- (added) clang/lib/Headers/openmp_wrappers/usm/force_usm.h (+6) 


``diff
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1b02087425b751..b9cd3043a13a9a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..2484a59085c276 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,20 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force APU mode will focefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

``




https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-driver

Author: Jan Patrick Lehr (jplehr)


Changes

The new flag implements logic to include `#pragma omp requires 
unified_shared_memory` in every translation unit.
This enables a straightforward way to enable USM for an application without the 
need to modify sources.

This is the flag mentioned in https://github.com/llvm/llvm-project/pull/75467
Once the test landed, I'll rebase and enable the test with this patch.

---
Full diff: https://github.com/llvm/llvm-project/pull/75468.diff


4 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+2) 
- (modified) clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp (+14) 
- (modified) clang/lib/Headers/CMakeLists.txt (+1) 
- (added) clang/lib/Headers/openmp_wrappers/usm/force_usm.h (+6) 


``diff
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1b02087425b751..b9cd3043a13a9a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..2484a59085c276 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,20 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force APU mode will focefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

``




https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Jan Patrick Lehr (jplehr)


Changes

The new flag implements logic to include `#pragma omp requires 
unified_shared_memory` in every translation unit.
This enables a straightforward way to enable USM for an application without the 
need to modify sources.

This is the flag mentioned in https://github.com/llvm/llvm-project/pull/75467
Once the test landed, I'll rebase and enable the test with this patch.

---
Full diff: https://github.com/llvm/llvm-project/pull/75468.diff


4 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+2) 
- (modified) clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp (+14) 
- (modified) clang/lib/Headers/CMakeLists.txt (+1) 
- (added) clang/lib/Headers/openmp_wrappers/usm/force_usm.h (+6) 


``diff
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1b02087425b751..b9cd3043a13a9a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..2484a59085c276 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,20 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force APU mode will focefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

``




https://github.com/llvm/llvm-project/pull/75468
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

2023-12-14 Thread Jan Patrick Lehr via cfe-commits

https://github.com/jplehr created 
https://github.com/llvm/llvm-project/pull/75468

The new flag implements logic to include `#pragma omp requires 
unified_shared_memory` in every translation unit.
This enables a straightforward way to enable USM for an application without the 
need to modify sources.

This is the flag mentioned in https://github.com/llvm/llvm-project/pull/75467
Once the test landed, I'll rebase and enable the test with this patch.

>From bc912bf0a63e6d10b60655d26846731d961021f3 Mon Sep 17 00:00:00 2001
From: JP Lehr 
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH] [OpenMP] Introduce -fopenmp-force-usm flag

The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
 clang/include/clang/Driver/Options.td |  2 ++
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp  | 14 ++
 clang/lib/Headers/CMakeLists.txt  |  1 +
 clang/lib/Headers/openmp_wrappers/usm/force_usm.h |  6 ++
 4 files changed, 23 insertions(+)
 create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1b02087425b751..b9cd3043a13a9a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], 
"fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[CC1Option]>;
 
 
//===--===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..2484a59085c276 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,20 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList 
) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
 const ArgList , ArgStringList ) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force APU mode will focefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+CC1Args.push_back("-include");
+CC1Args.push_back(
+DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList ,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h 
b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits