[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 closed this revision.
jhuber6 added a comment.

Screwed up and put the wrong revision in the commit message. Closed in 
rGd564409946a5a13cb6391fc0fec54dcbd6f6d249 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-08 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

Nice patch. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-07 Thread Ye Luo via Phabricator via cfe-commits
ye-luo accepted this revision.
ye-luo added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-07 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

@ye-luo good?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-07 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 296792.
jhuber6 added a comment.

Implementing Ye's code. I changed it to putput the architecture as a dependency 
and then set the value where we define the default architecture. I did a full 
build from scratch using this and got the sm_70 libraries for my machine 
without needing to specify it in my build script.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

Files:
  clang/CMakeLists.txt
  openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,14 @@
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
-  set(default_capabilities 35)
+  # Build library support for the highest compute capability the system 
supports
+  # and always build support for sm_35 by default
+  if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35)
+set(default_capabilities 35)
+  else()
+  set(default_capabilities "35,${LIBOMPTARGET_DEP_CUDA_ARCH}")
+  endif()
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
 set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
 libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is 
deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
Index: openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
===
--- openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
+++ openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -117,6 +117,18 @@
 endif()
 find_package(CUDA QUIET)
 
+# Try to get the highest Nvidia GPU architecture the system supports
+if (CUDA_FOUND)
+  cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH_OUTPUT ${CUDA_ARCH_FLAGS})
+  if (NOT DEFINED CUDA_ARCH_MATCH_OUTPUT OR "${CMAKE_MATCH_1}" LESS 35)
+libomptarget_warning_say("Setting Nvidia GPU architecture support for 
OpenMP target runtime library to sm_35 by default")
+set(LIBOMPTARGET_DEP_CUDA_ARCH "35")
+  else()
+set(LIBOMPTARGET_DEP_CUDA_ARCH "${CMAKE_MATCH_1}")
+  endif()
+endif()
+
 set(LIBOMPTARGET_DEP_CUDA_FOUND ${CUDA_FOUND})
 set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
 
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the atomicMax instruction on
 # 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
-  "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH 
"${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
-  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest Nvidia GPU architecture the system supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+  find_package(CUDA QUIET)
+  if (CUDA_FOUND)
+cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  endif()
+else()
+  set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH_MATCH OR "${CMAKE_MATCH_1}" LESS 35)
   set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
 "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_35")
+else()
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH_MATCH} CACHE STRING
+"Default architecture for OpenMP offloading to Nvidia GPUs.")
 endif()
 
 set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,14 @@
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
-  set(default_capabilities 35)
+  # Build library support for the highest compute capability the system supports
+  # and always bui

[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt:80
+  else()
+list(APPEND compute_capabilities ${CMAKE_MATCH_1})
+  endif()

jhuber6 wrote:
> ye-luo wrote:
> > 1. Doesn't work right now. Missing comma ",${CMAKE_MATCH_1}"
> > 2. using CUDA_ARCH as "string(REGEX MATCH" output causes problems.
> > 3. "append" needs to protect redundant 35.
> > 4. I think it is better to move this part of logic to 
> > `openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake`
> > after `find_package(CUDA QUIET)`.
> 1. I noticed the comma problem, it's because the compute capabilities uses 
> commas instead of semicolons like the rest of CMake to delimit the values. 
> There's another weird error I'm getting while trying to build with this where 
> it will add `sm_70` as an argument causing an nvcc error. like in `nvcc -o 
> out file.cpp sm_70`.
> 2. What's the problem here? Pretty much the output gives some string that you 
> would pass to nvcc like `--arch=sm_70` and I'm regexing out the `sm_70` if 
> there was no match or the architecture is too small it doesn't add anything.
> 3. I'm thinking it would just be easiest to change it do something like 
> `set(default_capabilities "35,${CMAKE_MATCH_1}")`
> 4. My feeling is that there's not enough complexity here to justify moving it 
> since I'd need to add just as much code to generate the output and then even 
> more to use it here. Since the LibomptargetGetDependencies.cmake doesn't 
> bother checking whether or not the `find_package(CUDA)` was successful I feel 
> like there's no need to add logic that requires checking if it was there when 
> we already have it here.
3. better than append but still need a check to avoid "35,35"
4. 
https://github.com/ye-luo/llvm-project/commit/ac5f20f9770e894ff48467a9317ec0649f5c7562
libomptarget part should be fulling working.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt:92
   foreach(sm ${nvptx_sm_list})
 set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
   endforeach()

my point 2 refers to here CUDA_ARCH which gets into the compile line, your 
point 1 issue. rename your output variable ot CUDA_ARCH_MATCH_OUTPUT should 
solve the problem.

I still think it is better to move default_capabilities. Very natural to have 
cuda_select_nvcc_arch_flags next to find_package(cuda) in one place.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt:80
+  else()
+list(APPEND compute_capabilities ${CMAKE_MATCH_1})
+  endif()

ye-luo wrote:
> 1. Doesn't work right now. Missing comma ",${CMAKE_MATCH_1}"
> 2. using CUDA_ARCH as "string(REGEX MATCH" output causes problems.
> 3. "append" needs to protect redundant 35.
> 4. I think it is better to move this part of logic to 
> `openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake`
> after `find_package(CUDA QUIET)`.
1. I noticed the comma problem, it's because the compute capabilities uses 
commas instead of semicolons like the rest of CMake to delimit the values. 
There's another weird error I'm getting while trying to build with this where 
it will add `sm_70` as an argument causing an nvcc error. like in `nvcc -o out 
file.cpp sm_70`.
2. What's the problem here? Pretty much the output gives some string that you 
would pass to nvcc like `--arch=sm_70` and I'm regexing out the `sm_70` if 
there was no match or the architecture is too small it doesn't add anything.
3. I'm thinking it would just be easiest to change it do something like 
`set(default_capabilities "35,${CMAKE_MATCH_1}")`
4. My feeling is that there's not enough complexity here to justify moving it 
since I'd need to add just as much code to generate the output and then even 
more to use it here. Since the LibomptargetGetDependencies.cmake doesn't bother 
checking whether or not the `find_package(CUDA)` was successful I feel like 
there's no need to add logic that requires checking if it was there when we 
already have it here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo requested changes to this revision.
ye-luo added inline comments.
This revision now requires changes to proceed.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt:80
+  else()
+list(APPEND compute_capabilities ${CMAKE_MATCH_1})
+  endif()

1. Doesn't work right now. Missing comma ",${CMAKE_MATCH_1}"
2. using CUDA_ARCH as "string(REGEX MATCH" output causes problems.
3. "append" needs to protect redundant 35.
4. I think it is better to move this part of logic to 
`openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake`
after `find_package(CUDA QUIET)`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 296577.
jhuber6 added a comment.

Removing redundant call to `find_package`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

Files:
  clang/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,19 @@
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
+  # Always build with compute capability sm_35 as a fallback and the highest
+  # architecture the system supports by default
   set(default_capabilities 35)
+  if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+  libomptarget_warning_say("Setting Nvidia GPU architecture support for 
OpenMP target runtime library to sm_35 by default")
+else()
+  list(APPEND default_capabilities ${CMAKE_MATCH_1})
+endif()
+  endif()
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
 set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
 libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is 
deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the atomicMax instruction on
 # 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
-  "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH 
"${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
-  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest architecture the host supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+  find_package(CUDA QUIET)
+  if (CUDA_FOUND)
+cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  endif()
+else()
+  set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
   set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
 "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_35")
+else()
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH} CACHE STRING
+"Default architecture for OpenMP offloading to Nvidia GPUs.")
 endif()
 
 set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -67,9 +67,19 @@
 
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
-  # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
+  # Always build with compute capability sm_35 as a fallback and the highest
+  # architecture the system supports by default
   set(default_capabilities 35)
+  if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+  libomptarget_warning_say("Setting Nvidia GPU architecture support for OpenMP target runtime library to sm_35 by default")
+else()
+  list(APPEND default_capabilities ${CMAKE_MATCH_1})
+endif()
+  endif()
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
 set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
 libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the a

[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

In D88929#2315640 , @JonChesterfield 
wrote:

> An alternative approach is to build the deviceRTL for multiple cuda versions 
> and then pick whichever one is the best fit when compiling application code. 
> That has advantages when building the deviceRTL libraries on a different 
> machine to the one that intends to use it.
>
> Cmake isn't my thing, but I see that my trunk build only has 
> libomptarget-nvptx-sm_35.bc when the local card is a sm_50. The downstream 
> amd toolchain builds lots of this library, my install dir has fifteen of them 
> (including sm_50).

You can build multiple deviceRTL today with 
LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=50,61,70. This patch tries to add the 
high arch automatically.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

An alternative approach is to build the deviceRTL for multiple cuda versions 
and then pick whichever one is the best fit when compiling application code. 
That has advantages when building the deviceRTL libraries on a different 
machine to the one that intends to use it.

Cmake isn't my thing, but I see that my trunk build only has 
libomptarget-nvptx-sm_35.bc when the local card is a sm_50. The downstream amd 
toolchain builds lots of this library, my install dir has fifteen of them 
(including sm_50).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

In D88929#2315538 , @jhuber6 wrote:

> In D88929#2315519 , @ye-luo wrote:
>
>> Probably not messing with `enable_language(CUDA)` at the moment, just add 
>> `cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)` to  
>> `openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake?
>
> That only controls loading the library, since this is where we set all the 
> CUDA options I think it's fine to call it here.

Yes. I'm reducing my review to libomptarget only. I could not comment on 
clang/CMakeLists.txt. Regarding libomptarget, keep all the CUDA detection 
inside `LibomptargetGetDependencies.cmake`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D88929#2315519 , @ye-luo wrote:

> Probably not messing with `enable_language(CUDA)` at the moment, just add 
> `cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)` to  
> `openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake?

That only controls loading the library, since this is where we set all the CUDA 
options I think it's fine to call it here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

In D88929#2315513 , @jhuber6 wrote:

> In D88929#2315451 , @ye-luo wrote:
>
>> I just realized that this patch affects clang and libomptarget.
>> I cannot comment on clang. Regarding libomptarget, Could you explain why the 
>> detection is not put together with other cuda stuff in 
>> `openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake`
>
> If we're sticking with using FindCUDA it's definitely redundant here since it 
> was already called by the time we get here. The support for CUDA language 
> would use the same method but have `enable_language(CUDA)` somewhere instead 
> of `find_package(CUDA)`

Probably not messing with `enable_language(CUDA)` at the moment, just add 
`cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)` to  
`openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D88929#2315451 , @ye-luo wrote:

> I just realized that this patch affects clang and libomptarget.
> I cannot comment on clang. Regarding libomptarget, Could you explain why the 
> detection is not put together with other cuda stuff in 
> `openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake`

If we're sticking with using FindCUDA it's definitely redundant here since it 
was already called by the time we get here. The support for CUDA language would 
use the same method but have `enable_language(CUDA)` somewhere instead of 
`find_package(CUDA)`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

I just realized that this patch affects clang and libomptarget.
I cannot comment on clang. Regarding libomptarget, Could you explain why the 
detection is not put together with other cuda stuff in 
`openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D88929#2315402 , @ye-luo wrote:

> The link I posted indicated that independent feature is merged since 3.12. 
> Better to avoid deprecated stuff when introducing new cmake lines even though 
> some existing lines may still rely on deprecated cmake.

This requires adding CUDA as a language, is that alright inside of Clang? 
Nothing is using CUDA, we're just checking the architecture.

In D88929#2315404 , @ye-luo wrote:

> 3.18 introduces CMAKE_CUDA_ARCHITECTURES. Does 3.18 supports detection? If we 
> know a new way works since 3.18, I think putting both with if-else makes 
> sense.
>
> Regarding doing it manually. I think it is more risky than using existing 
> schemes shipped with cmake.

I'm not sure how that would affect the other Libomptarget stuff that uses 
FindCUDA since language enabling is done at the highest level, does that 
conflict?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

3.18 introduces CMAKE_CUDA_ARCHITECTURES. Does 3.18 supports detection? If we 
know a new way works since 3.18, I think putting both with if-else makes sense.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

The link I posted indicated that independent feature is merged since 3.12. 
Better to avoid deprecated stuff when introducing new cmake lines even though 
some existing lines may rely on deprecated cmake.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D88929#2315378 , @ye-luo wrote:

> FindCUDA has been deprecated.
> Please explore the following feature without directly calling FindCUDA.
> https://gitlab.kitware.com/cmake/cmake/-/merge_requests/1856

Finding architectures using the CUDA language support requires CMake 3.18 as 
far as I know. LLVM's minimum CMake requirement is 3.13.4 so this method is 
probably the best I could see. Libomptarget already uses FindCUDA inside 
`openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake` so I 
figured it was okay. Alternatively, we could do the detection manually. All 
this amount to is compiling and running some CUDA code that prints the major 
and minor version. We could do that manually and try to use the original 
language support if you think that's a better option, I'm not a CMake guru or 
anything.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

FindCUDA has been deprecated.
Please explore the following feature without directly calling FindCUDA.
https://gitlab.kitware.com/cmake/cmake/-/merge_requests/1856


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88929/new/

https://reviews.llvm.org/D88929

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D88929: [OpenMP] Change CMake Configuration to Build for Highest CUDA Architecture by Default

2020-10-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added a reviewer: jdoerfert.
jhuber6 added projects: clang, OpenMP.
Herald added subscribers: openmp-commits, cfe-commits, guansong, yaxunl, mgorny.
jhuber6 requested review of this revision.
Herald added a subscriber: sstefan1.

This patch changes the CMake files for Clang and Libomptarget to query the 
system for its supported CUDA architecture. This will simplify the experience 
of building LLVM with OpenMP Offloading support by removing the need to 
manually specify the most optimal architecture for each system. Libomptarget 
will also build support for sm_35 as a fallback. This uses the find_cuda 
methods from CMake to detect the architecture which is deprecated in Cmake 3.18.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88929

Files:
  clang/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -68,13 +68,26 @@
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
   # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
-  set(default_capabilities 35)
+  set(compute_capabilities 35)
+  if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+find_package(CUDA QUIET)
+if (CUDA_FOUND)
+  cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+  if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
+  message(WARNING "Setting default architecture for OpenMP target 
library to sm_35")
+  else()
+list(APPEND compute_capabilities ${CMAKE_MATCH_1})
+  endif()
+endif()
+  endif()
+
+
   if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
 set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
 libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is 
deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
   endif()
-  set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE 
STRING
+  set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${compute_capabilities} CACHE 
STRING
 "List of CUDA Compute Capabilities to be used to compile the NVPTX device 
RTL.")
   string(REPLACE "," ";" nvptx_sm_list 
${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})
 
Index: clang/CMakeLists.txt
===
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -305,13 +305,26 @@
 # OpenMP offloading requires at least sm_35 because we use shuffle instructions
 # to generate efficient code for reductions and the atomicMax instruction on
 # 64-bit integers in the implementation of conditional lastprivate.
-set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
-  "Default architecture for OpenMP offloading to Nvidia GPUs.")
-string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH 
"${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
-if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
-  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_35")
+set(CUDA_ARCH_FLAGS "sm_35")
+
+# Try to find the highest architecture the host supports
+if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH)
+  find_package(CUDA QUIET)
+  if (CUDA_FOUND)
+cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  endif()
+else()
+  set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH})
+endif()
+
+string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
   set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
 "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_35")
+else()
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH} CACHE STRING
+"Default architecture for OpenMP offloading to Nvidia GPUs.")
 endif()
 
 set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch")


Index: openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
===
--- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -68,13 +68,26 @@
   set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
 
   # Get the compute capability the user requested or use SM_35 by default.
-  # SM_35 is what clang uses by default.
-  set(default_capabilities 35)
+  set(compute_capabilities 35)
+  if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
+find_package(CUDA QUIET)
+if (CUDA_FOUND)
+  cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
+  string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS})
+  if (NOT DEFINED CUDA_ARCH OR "${CMAK