[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-17 Thread Jonas Hahnfeld via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL315996: [CMake][OpenMP] Customize default offloading arch 
(authored by Hahnfeld).

Changed prior to commit:
  https://reviews.llvm.org/D38883?vs=118961=119310#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D38883

Files:
  cfe/trunk/CMakeLists.txt
  cfe/trunk/include/clang/Config/config.h.cmake
  cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
  cfe/trunk/lib/Driver/ToolChains/Cuda.h


Index: cfe/trunk/include/clang/Config/config.h.cmake
===
--- cfe/trunk/include/clang/Config/config.h.cmake
+++ cfe/trunk/include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
 /* Default OpenMP runtime used by -fopenmp. */
 #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
 
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
 /* Multilib suffix for libdir. */
 #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
 
Index: cfe/trunk/CMakeLists.txt
===
--- cfe/trunk/CMakeLists.txt
+++ cfe/trunk/CMakeLists.txt
@@ -235,6 +235,17 @@
 set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code for reductions.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+  "Default architecture for OpenMP offloading to Nvidia GPUs.")
+string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH 
"${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
+  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_30")
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+endif()
+
 set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
   "Vendor-specific text for showing with version information.")
 
Index: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
===
--- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
@@ -542,9 +542,9 @@
   // flags are not duplicated.
   // Also append the compute capability.
   if (DeviceOffloadKind == Action::OFK_OpenMP) {
-for (Arg *A : Args){
+for (Arg *A : Args) {
   bool IsDuplicate = false;
-  for (Arg *DALArg : *DAL){
+  for (Arg *DALArg : *DAL) {
 if (A == DALArg) {
   IsDuplicate = true;
   break;
@@ -555,14 +555,9 @@
 }
 
 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
-if (Arch.empty()) {
-  // Default compute capability for CUDA toolchain is the
-  // lowest compute capability supported by the installed
-  // CUDA version.
-  DAL->AddJoinedArg(nullptr,
-  Opts.getOption(options::OPT_march_EQ),
-  CudaInstallation.getLowestExistingArch());
-}
+if (Arch.empty())
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
 
 return DAL;
   }
Index: cfe/trunk/lib/Driver/ToolChains/Cuda.h
===
--- cfe/trunk/lib/Driver/ToolChains/Cuda.h
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
   std::string getLibDeviceFile(StringRef Gpu) const {
 return LibDeviceMap.lookup(Gpu);
   }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-return key;
-}
-return "sm_20";
-  }
 };
 
 namespace tools {


Index: cfe/trunk/include/clang/Config/config.h.cmake
===
--- cfe/trunk/include/clang/Config/config.h.cmake
+++ cfe/trunk/include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
 /* Default OpenMP runtime used by -fopenmp. */
 #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
 
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
 /* Multilib suffix for libdir. */
 #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
 
Index: cfe/trunk/CMakeLists.txt
===
--- cfe/trunk/CMakeLists.txt
+++ cfe/trunk/CMakeLists.txt
@@ -235,6 +235,17 @@
 set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code 

[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-16 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added a comment.

LGTM


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.h:90
-  }
 };
 

gtbercea wrote:
> gtbercea wrote:
> > tra wrote:
> > > gtbercea wrote:
> > > > gtbercea wrote:
> > > > > I would also like to keep the spirit of this code if not in this 
> > > > > exact form at least something that performs the same functionality.
> > > > @tra what's your opinion on this code? Should this stay, stay but 
> > > > modified to be more robust or taken out completely?
> > > There are currently no users for this. In general, I would rather not 
> > > have magically-changing default GPU based on how broken your CUDA 
> > > installation is. IMO it would be better to keep defaults static and fail 
> > > if prerequisites are not met.
> > I would have thought that it is up to the compiler to select, as default, 
> > the lowest viable compute capability. This is what this code aims to do 
> > (whether it actually does that's a separate issue :) ).
> > 
> The reason I added this code in the first place was to overcome the fact that 
> something like a default of sm_30 may work on the K40 but once you go to 
> newer Pascal, Volta GPUs then you need a new minimum compute capability that 
> is supported.
> Should this stay, stay but modified to be more robust or taken out completely?

I'd take it out, at least for now as you've removed the only user of that 
function.

In general, though, compilers tend to use conservative defaults and for CUDA 
that would be the lowest GPU variant supported by compiler. In case of CUDA 
it's determined by the CUDA SDK version. Figuring lowers supported version via 
libdevice mapping we've created is wrong. E.g. with this patch and -nocudalib 
you may end up using CUDA-9 without any libdevice and would return sm_20.

If/when we need to figure out minimum supported version, it should be based 
directly on the value returned by version().


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Jonas Hahnfeld via Phabricator via cfe-commits
Hahnfeld updated this revision to Diff 118961.
Hahnfeld marked an inline comment as done.
Hahnfeld edited the summary of this revision.
Hahnfeld added a comment.

Check that the user didn't specify a value lower than `sm_30` and re-add some 
code as discussed.


https://reviews.llvm.org/D38883

Files:
  CMakeLists.txt
  include/clang/Config/config.h.cmake
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h


Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
   std::string getLibDeviceFile(StringRef Gpu) const {
 return LibDeviceMap.lookup(Gpu);
   }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-return key;
-}
-return "sm_20";
-  }
 };
 
 namespace tools {
Index: lib/Driver/ToolChains/Cuda.cpp
===
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -551,9 +551,9 @@
   // flags are not duplicated.
   // Also append the compute capability.
   if (DeviceOffloadKind == Action::OFK_OpenMP) {
-for (Arg *A : Args){
+for (Arg *A : Args) {
   bool IsDuplicate = false;
-  for (Arg *DALArg : *DAL){
+  for (Arg *DALArg : *DAL) {
 if (A == DALArg) {
   IsDuplicate = true;
   break;
@@ -564,14 +564,9 @@
 }
 
 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
-if (Arch.empty()) {
-  // Default compute capability for CUDA toolchain is the
-  // lowest compute capability supported by the installed
-  // CUDA version.
-  DAL->AddJoinedArg(nullptr,
-  Opts.getOption(options::OPT_march_EQ),
-  CudaInstallation.getLowestExistingArch());
-}
+if (Arch.empty())
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
 
 return DAL;
   }
Index: include/clang/Config/config.h.cmake
===
--- include/clang/Config/config.h.cmake
+++ include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
 /* Default OpenMP runtime used by -fopenmp. */
 #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
 
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
 /* Multilib suffix for libdir. */
 #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
 
Index: CMakeLists.txt
===
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -235,6 +235,17 @@
 set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code for reductions.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+  "Default architecture for OpenMP offloading to Nvidia GPUs.")
+string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH 
"${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
+  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_30")
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+endif()
+
 set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
   "Vendor-specific text for showing with version information.")
 


Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
   std::string getLibDeviceFile(StringRef Gpu) const {
 return LibDeviceMap.lookup(Gpu);
   }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-return key;
-}
-return "sm_20";
-  }
 };
 
 namespace tools {
Index: lib/Driver/ToolChains/Cuda.cpp
===
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -551,9 +551,9 @@
   // flags are not duplicated.
   // Also append the compute capability.
   if (DeviceOffloadKind == Action::OFK_OpenMP) {
-for (Arg *A : Args){
+for (Arg *A : Args) {
   bool IsDuplicate = false;
-  for (Arg *DALArg : *DAL){
+  for (Arg *DALArg : *DAL) {
 if (A == DALArg) {
 

[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.h:90
-  }
 };
 

gtbercea wrote:
> tra wrote:
> > gtbercea wrote:
> > > gtbercea wrote:
> > > > I would also like to keep the spirit of this code if not in this exact 
> > > > form at least something that performs the same functionality.
> > > @tra what's your opinion on this code? Should this stay, stay but 
> > > modified to be more robust or taken out completely?
> > There are currently no users for this. In general, I would rather not have 
> > magically-changing default GPU based on how broken your CUDA installation 
> > is. IMO it would be better to keep defaults static and fail if 
> > prerequisites are not met.
> I would have thought that it is up to the compiler to select, as default, the 
> lowest viable compute capability. This is what this code aims to do (whether 
> it actually does that's a separate issue :) ).
> 
The reason I added this code in the first place was to overcome the fact that 
something like a default of sm_30 may work on the K40 but once you go to newer 
Pascal, Volta GPUs then you need a new minimum compute capability that is 
supported.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.h:90
-  }
 };
 

tra wrote:
> gtbercea wrote:
> > gtbercea wrote:
> > > I would also like to keep the spirit of this code if not in this exact 
> > > form at least something that performs the same functionality.
> > @tra what's your opinion on this code? Should this stay, stay but modified 
> > to be more robust or taken out completely?
> There are currently no users for this. In general, I would rather not have 
> magically-changing default GPU based on how broken your CUDA installation is. 
> IMO it would be better to keep defaults static and fail if prerequisites are 
> not met.
I would have thought that it is up to the compiler to select, as default, the 
lowest viable compute capability. This is what this code aims to do (whether it 
actually does that's a separate issue :) ).



https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.h:90
-  }
 };
 

gtbercea wrote:
> gtbercea wrote:
> > I would also like to keep the spirit of this code if not in this exact form 
> > at least something that performs the same functionality.
> @tra what's your opinion on this code? Should this stay, stay but modified to 
> be more robust or taken out completely?
There are currently no users for this. In general, I would rather not have 
magically-changing default GPU based on how broken your CUDA installation is. 
IMO it would be better to keep defaults static and fail if prerequisites are 
not met.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.h:90
-  }
 };
 

gtbercea wrote:
> I would also like to keep the spirit of this code if not in this exact form 
> at least something that performs the same functionality.
@tra what's your opinion on this code? Should this stay, stay but modified to 
be more robust or taken out completely?


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

gtbercea wrote:
> tra wrote:
> > tra wrote:
> > > gtbercea wrote:
> > > > gtbercea wrote:
> > > > > Hahnfeld wrote:
> > > > > > tra wrote:
> > > > > > > Hahnfeld wrote:
> > > > > > > > tra wrote:
> > > > > > > > > I'd keep this code. It appears to serve useful purpose as it 
> > > > > > > > > requires CUDA installation to have at least some libdevice 
> > > > > > > > > library in it.  It gives us a change to find a valid 
> > > > > > > > > installation, instead of ailing some time later when we ask 
> > > > > > > > > for a libdevice file and fail because there are none.
> > > > > > > > We had some internal discussions about this after I submitted 
> > > > > > > > the patch here.
> > > > > > > > 
> > > > > > > > The main question is: Do we want to support CUDA installations 
> > > > > > > > without libdevice and are there use cases for that? I'd say 
> > > > > > > > that the user should be able to use a toolchain without 
> > > > > > > > libdevice together with `-nocudalib`.
> > > > > > > Sounds reasonable. How about keeping the code but putting it 
> > > > > > > under `if(!hasArg(nocudalib))`?
> > > > > > > 
> > > > > > Ok, I'll do that in a separate patch and keep the code here for now.
> > > > > The problem with nocudalib is that if for example you write a test, 
> > > > > which looks to verify some device facing feature that requires a 
> > > > > libdevice to be found (so you don't want to use nocudalib), it will 
> > > > > probably work on your machine which has the correct CUDA setup but 
> > > > > fail on another machine which does not (which is where you want to 
> > > > > use nocudalib). You can see the contradiction there.
> > > > Just to be clear I am arguing for keeping this code :)
> > > @gtbercea: I'm not sure I follow your example. If you're talking about 
> > > clang tests, we do have fake CUDA installation setup under 
> > > test/Driver/Inputs which removes dependency on whatever CUDA you may or 
> > > may not have installed on your machine. I also don't see a contradiction 
> > > -- you you do need libdevice, it makes no point picking a broken CUDA 
> > > installation which does not have any libdevice files. If you explicitly 
> > > tell compiler that you don't need libdevice, that would make CUDA w/o 
> > > libdevice acceptable. With --cuda-path you do have a way to tell clang 
> > > which installation you want it to use. What do I miss?
> > > 
> > > 
> > Ah, you were arguing with Hahnfeld@'s -nocudalib example. Then I guess 
> > we're in violent agreement.
> I fully agree with this: "you do need libdevice, it makes no point picking a 
> broken CUDA installation which does not have any libdevice files. If you 
> explicitly tell compiler that you don't need libdevice, that would make CUDA 
> w/o libdevice acceptable."
> 
> I was trying to show an example of a situation where you have your code 
> compiled using nocudalib on one machine and then the same code will error on 
> a machine which requires the nocudalib flag to be passed to make up for the 
> absence of libdevice.
> 
> 
Yes it was a counter argument to that! :) 


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

tra wrote:
> tra wrote:
> > gtbercea wrote:
> > > gtbercea wrote:
> > > > Hahnfeld wrote:
> > > > > tra wrote:
> > > > > > Hahnfeld wrote:
> > > > > > > tra wrote:
> > > > > > > > I'd keep this code. It appears to serve useful purpose as it 
> > > > > > > > requires CUDA installation to have at least some libdevice 
> > > > > > > > library in it.  It gives us a change to find a valid 
> > > > > > > > installation, instead of ailing some time later when we ask for 
> > > > > > > > a libdevice file and fail because there are none.
> > > > > > > We had some internal discussions about this after I submitted the 
> > > > > > > patch here.
> > > > > > > 
> > > > > > > The main question is: Do we want to support CUDA installations 
> > > > > > > without libdevice and are there use cases for that? I'd say that 
> > > > > > > the user should be able to use a toolchain without libdevice 
> > > > > > > together with `-nocudalib`.
> > > > > > Sounds reasonable. How about keeping the code but putting it under 
> > > > > > `if(!hasArg(nocudalib))`?
> > > > > > 
> > > > > Ok, I'll do that in a separate patch and keep the code here for now.
> > > > The problem with nocudalib is that if for example you write a test, 
> > > > which looks to verify some device facing feature that requires a 
> > > > libdevice to be found (so you don't want to use nocudalib), it will 
> > > > probably work on your machine which has the correct CUDA setup but fail 
> > > > on another machine which does not (which is where you want to use 
> > > > nocudalib). You can see the contradiction there.
> > > Just to be clear I am arguing for keeping this code :)
> > @gtbercea: I'm not sure I follow your example. If you're talking about 
> > clang tests, we do have fake CUDA installation setup under 
> > test/Driver/Inputs which removes dependency on whatever CUDA you may or may 
> > not have installed on your machine. I also don't see a contradiction -- you 
> > you do need libdevice, it makes no point picking a broken CUDA installation 
> > which does not have any libdevice files. If you explicitly tell compiler 
> > that you don't need libdevice, that would make CUDA w/o libdevice 
> > acceptable. With --cuda-path you do have a way to tell clang which 
> > installation you want it to use. What do I miss?
> > 
> > 
> Ah, you were arguing with Hahnfeld@'s -nocudalib example. Then I guess we're 
> in violent agreement.
I fully agree with this: "you do need libdevice, it makes no point picking a 
broken CUDA installation which does not have any libdevice files. If you 
explicitly tell compiler that you don't need libdevice, that would make CUDA 
w/o libdevice acceptable."

I was trying to show an example of a situation where you have your code 
compiled using nocudalib on one machine and then the same code will error on a 
machine which requires the nocudalib flag to be passed to make up for the 
absence of libdevice.




https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

tra wrote:
> gtbercea wrote:
> > gtbercea wrote:
> > > Hahnfeld wrote:
> > > > tra wrote:
> > > > > Hahnfeld wrote:
> > > > > > tra wrote:
> > > > > > > I'd keep this code. It appears to serve useful purpose as it 
> > > > > > > requires CUDA installation to have at least some libdevice 
> > > > > > > library in it.  It gives us a change to find a valid 
> > > > > > > installation, instead of ailing some time later when we ask for a 
> > > > > > > libdevice file and fail because there are none.
> > > > > > We had some internal discussions about this after I submitted the 
> > > > > > patch here.
> > > > > > 
> > > > > > The main question is: Do we want to support CUDA installations 
> > > > > > without libdevice and are there use cases for that? I'd say that 
> > > > > > the user should be able to use a toolchain without libdevice 
> > > > > > together with `-nocudalib`.
> > > > > Sounds reasonable. How about keeping the code but putting it under 
> > > > > `if(!hasArg(nocudalib))`?
> > > > > 
> > > > Ok, I'll do that in a separate patch and keep the code here for now.
> > > The problem with nocudalib is that if for example you write a test, which 
> > > looks to verify some device facing feature that requires a libdevice to 
> > > be found (so you don't want to use nocudalib), it will probably work on 
> > > your machine which has the correct CUDA setup but fail on another machine 
> > > which does not (which is where you want to use nocudalib). You can see 
> > > the contradiction there.
> > Just to be clear I am arguing for keeping this code :)
> @gtbercea: I'm not sure I follow your example. If you're talking about clang 
> tests, we do have fake CUDA installation setup under test/Driver/Inputs which 
> removes dependency on whatever CUDA you may or may not have installed on your 
> machine. I also don't see a contradiction -- you you do need libdevice, it 
> makes no point picking a broken CUDA installation which does not have any 
> libdevice files. If you explicitly tell compiler that you don't need 
> libdevice, that would make CUDA w/o libdevice acceptable. With --cuda-path 
> you do have a way to tell clang which installation you want it to use. What 
> do I miss?
> 
> 
Ah, you were arguing with Hahnfeld@'s -nocudalib example. Then I guess we're in 
violent agreement.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

gtbercea wrote:
> gtbercea wrote:
> > Hahnfeld wrote:
> > > tra wrote:
> > > > Hahnfeld wrote:
> > > > > tra wrote:
> > > > > > I'd keep this code. It appears to serve useful purpose as it 
> > > > > > requires CUDA installation to have at least some libdevice library 
> > > > > > in it.  It gives us a change to find a valid installation, instead 
> > > > > > of ailing some time later when we ask for a libdevice file and fail 
> > > > > > because there are none.
> > > > > We had some internal discussions about this after I submitted the 
> > > > > patch here.
> > > > > 
> > > > > The main question is: Do we want to support CUDA installations 
> > > > > without libdevice and are there use cases for that? I'd say that the 
> > > > > user should be able to use a toolchain without libdevice together 
> > > > > with `-nocudalib`.
> > > > Sounds reasonable. How about keeping the code but putting it under 
> > > > `if(!hasArg(nocudalib))`?
> > > > 
> > > Ok, I'll do that in a separate patch and keep the code here for now.
> > The problem with nocudalib is that if for example you write a test, which 
> > looks to verify some device facing feature that requires a libdevice to be 
> > found (so you don't want to use nocudalib), it will probably work on your 
> > machine which has the correct CUDA setup but fail on another machine which 
> > does not (which is where you want to use nocudalib). You can see the 
> > contradiction there.
> Just to be clear I am arguing for keeping this code :)
@gtbercea: I'm not sure I follow your example. If you're talking about clang 
tests, we do have fake CUDA installation setup under test/Driver/Inputs which 
removes dependency on whatever CUDA you may or may not have installed on your 
machine. I also don't see a contradiction -- you you do need libdevice, it 
makes no point picking a broken CUDA installation which does not have any 
libdevice files. If you explicitly tell compiler that you don't need libdevice, 
that would make CUDA w/o libdevice acceptable. With --cuda-path you do have a 
way to tell clang which installation you want it to use. What do I miss?




https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.h:90
-  }
 };
 

I would also like to keep the spirit of this code if not in this exact form at 
least something that performs the same functionality.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

gtbercea wrote:
> Hahnfeld wrote:
> > tra wrote:
> > > Hahnfeld wrote:
> > > > tra wrote:
> > > > > I'd keep this code. It appears to serve useful purpose as it requires 
> > > > > CUDA installation to have at least some libdevice library in it.  It 
> > > > > gives us a change to find a valid installation, instead of ailing 
> > > > > some time later when we ask for a libdevice file and fail because 
> > > > > there are none.
> > > > We had some internal discussions about this after I submitted the patch 
> > > > here.
> > > > 
> > > > The main question is: Do we want to support CUDA installations without 
> > > > libdevice and are there use cases for that? I'd say that the user 
> > > > should be able to use a toolchain without libdevice together with 
> > > > `-nocudalib`.
> > > Sounds reasonable. How about keeping the code but putting it under 
> > > `if(!hasArg(nocudalib))`?
> > > 
> > Ok, I'll do that in a separate patch and keep the code here for now.
> The problem with nocudalib is that if for example you write a test, which 
> looks to verify some device facing feature that requires a libdevice to be 
> found (so you don't want to use nocudalib), it will probably work on your 
> machine which has the correct CUDA setup but fail on another machine which 
> does not (which is where you want to use nocudalib). You can see the 
> contradiction there.
Just to be clear I am arguing for keeping this code :)


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Gheorghe-Teodor Bercea via Phabricator via cfe-commits
gtbercea added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

Hahnfeld wrote:
> tra wrote:
> > Hahnfeld wrote:
> > > tra wrote:
> > > > I'd keep this code. It appears to serve useful purpose as it requires 
> > > > CUDA installation to have at least some libdevice library in it.  It 
> > > > gives us a change to find a valid installation, instead of ailing some 
> > > > time later when we ask for a libdevice file and fail because there are 
> > > > none.
> > > We had some internal discussions about this after I submitted the patch 
> > > here.
> > > 
> > > The main question is: Do we want to support CUDA installations without 
> > > libdevice and are there use cases for that? I'd say that the user should 
> > > be able to use a toolchain without libdevice together with `-nocudalib`.
> > Sounds reasonable. How about keeping the code but putting it under 
> > `if(!hasArg(nocudalib))`?
> > 
> Ok, I'll do that in a separate patch and keep the code here for now.
The problem with nocudalib is that if for example you write a test, which looks 
to verify some device facing feature that requires a libdevice to be found (so 
you don't want to use nocudalib), it will probably work on your machine which 
has the correct CUDA setup but fail on another machine which does not (which is 
where you want to use nocudalib). You can see the contradiction there.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Jonas Hahnfeld via Phabricator via cfe-commits
Hahnfeld marked 4 inline comments as done.
Hahnfeld added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

tra wrote:
> Hahnfeld wrote:
> > tra wrote:
> > > I'd keep this code. It appears to serve useful purpose as it requires 
> > > CUDA installation to have at least some libdevice library in it.  It 
> > > gives us a change to find a valid installation, instead of ailing some 
> > > time later when we ask for a libdevice file and fail because there are 
> > > none.
> > We had some internal discussions about this after I submitted the patch 
> > here.
> > 
> > The main question is: Do we want to support CUDA installations without 
> > libdevice and are there use cases for that? I'd say that the user should be 
> > able to use a toolchain without libdevice together with `-nocudalib`.
> Sounds reasonable. How about keeping the code but putting it under 
> `if(!hasArg(nocudalib))`?
> 
Ok, I'll do that in a separate patch and keep the code here for now.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

Hahnfeld wrote:
> tra wrote:
> > I'd keep this code. It appears to serve useful purpose as it requires CUDA 
> > installation to have at least some libdevice library in it.  It gives us a 
> > change to find a valid installation, instead of ailing some time later when 
> > we ask for a libdevice file and fail because there are none.
> We had some internal discussions about this after I submitted the patch here.
> 
> The main question is: Do we want to support CUDA installations without 
> libdevice and are there use cases for that? I'd say that the user should be 
> able to use a toolchain without libdevice together with `-nocudalib`.
Sounds reasonable. How about keeping the code but putting it under 
`if(!hasArg(nocudalib))`?




Comment at: lib/Driver/ToolChains/Cuda.cpp:556
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
 }

Hahnfeld wrote:
> tra wrote:
> > This sets default GPU arch for *everyone* based on OPENMP requirements. 
> > Perhaps this should be predicated on this being openmp compilation.
> > 
> > IMO to avoid unnecessary surprises, the default for CUDA compilation should 
> > follow defaults of nvcc. sm_30 becomes default only in CUDA-9.
> > 
> This branch is only executed for OpenMP, see above
OK. I've missed that 'if'.


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Jonas Hahnfeld via Phabricator via cfe-commits
Hahnfeld marked an inline comment as done.
Hahnfeld added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

tra wrote:
> I'd keep this code. It appears to serve useful purpose as it requires CUDA 
> installation to have at least some libdevice library in it.  It gives us a 
> change to find a valid installation, instead of ailing some time later when 
> we ask for a libdevice file and fail because there are none.
We had some internal discussions about this after I submitted the patch here.

The main question is: Do we want to support CUDA installations without 
libdevice and are there use cases for that? I'd say that the user should be 
able to use a toolchain without libdevice together with `-nocudalib`.



Comment at: lib/Driver/ToolChains/Cuda.cpp:540
   // Also append the compute capability.
   if (DeviceOffloadKind == Action::OFK_OpenMP) {
 for (Arg *A : Args){

This check guards the whole block.



Comment at: lib/Driver/ToolChains/Cuda.cpp:556
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
 }

tra wrote:
> This sets default GPU arch for *everyone* based on OPENMP requirements. 
> Perhaps this should be predicated on this being openmp compilation.
> 
> IMO to avoid unnecessary surprises, the default for CUDA compilation should 
> follow defaults of nvcc. sm_30 becomes default only in CUDA-9.
> 
This branch is only executed for OpenMP, see above


https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: lib/Driver/ToolChains/Cuda.cpp:170-182
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())

I'd keep this code. It appears to serve useful purpose as it requires CUDA 
installation to have at least some libdevice library in it.  It gives us a 
change to find a valid installation, instead of ailing some time later when we 
ask for a libdevice file and fail because there are none.



Comment at: lib/Driver/ToolChains/Cuda.cpp:556
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
 }

This sets default GPU arch for *everyone* based on OPENMP requirements. Perhaps 
this should be predicated on this being openmp compilation.

IMO to avoid unnecessary surprises, the default for CUDA compilation should 
follow defaults of nvcc. sm_30 becomes default only in CUDA-9.



https://reviews.llvm.org/D38883



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38883: [CMake][OpenMP] Customize default offloading arch

2017-10-13 Thread Jonas Hahnfeld via Phabricator via cfe-commits
Hahnfeld created this revision.
Herald added a subscriber: mgorny.

For the shuffle instructions in reductions we need at least sm_30
but the user may want to customize the default architecture.
Also remove some code that went in while troubleshooting broken
tests on external build bots.


https://reviews.llvm.org/D38883

Files:
  CMakeLists.txt
  include/clang/Config/config.h.cmake
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h


Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
   std::string getLibDeviceFile(StringRef Gpu) const {
 return LibDeviceMap.lookup(Gpu);
   }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-return key;
-}
-return "sm_20";
-  }
 };
 
 namespace tools {
Index: lib/Driver/ToolChains/Cuda.cpp
===
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -167,19 +167,6 @@
   }
 }
 
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-allEmpty = false;
-}
-
-if (allEmpty)
-  continue;
-
 IsValid = true;
 break;
   }
@@ -565,12 +552,8 @@
 
 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
 if (Arch.empty()) {
-  // Default compute capability for CUDA toolchain is the
-  // lowest compute capability supported by the installed
-  // CUDA version.
-  DAL->AddJoinedArg(nullptr,
-  Opts.getOption(options::OPT_march_EQ),
-  CudaInstallation.getLowestExistingArch());
+  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
 }
 
 return DAL;
Index: include/clang/Config/config.h.cmake
===
--- include/clang/Config/config.h.cmake
+++ include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
 /* Default OpenMP runtime used by -fopenmp. */
 #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
 
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
 /* Multilib suffix for libdir. */
 #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
 
Index: CMakeLists.txt
===
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -235,6 +235,16 @@
 set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code for reductions.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+  "Default architecture for OpenMP offloading to Nvidia GPUs.")
+if (NOT("${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}" MATCHES "^sm_[0-9]+$"))
+  message(WARNING "Resetting default architecture for OpenMP offloading to 
Nvidia GPUs to sm_30")
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+endif()
+
 set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
   "Vendor-specific text for showing with version information.")
 


Index: lib/Driver/ToolChains/Cuda.h
===
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
   std::string getLibDeviceFile(StringRef Gpu) const {
 return LibDeviceMap.lookup(Gpu);
   }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-return key;
-}
-return "sm_20";
-  }
 };
 
 namespace tools {
Index: lib/Driver/ToolChains/Cuda.cpp
===
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -167,19 +167,6 @@
   }
 }
 
-// This code prevents IsValid from being set when
-// no libdevice has been found.
-bool allEmpty = true;
-std::string LibDeviceFile;
-for (auto key : LibDeviceMap.keys()) {
-  LibDeviceFile = LibDeviceMap.lookup(key);
-  if (!LibDeviceFile.empty())
-allEmpty = false;
-}
-
-