[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-06-01 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam updated this revision to Diff 433401.
saiislam added a comment.

Added the multi-entry logic in libomptarget. Yet to move the image 
compatibility testing to plugin.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/include/rtl.h
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/interface.cpp
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,127 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  // offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!(image_info && image_info->image_info_version == 1))
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+bool missing_capability = true;
+for (auto capability : caps)
+  if (capability == req)
+missing_capability = false;
+if (missing_capability) {
+  DP("Image requires %s but runtime capability %s is missing.\n",
+ image_info->offload_arch, req.c_str());
+  is_compatible = false;
+}
+  }
+  return is_compatible;
+}
+
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  __tgt_device_image *newDeviceImages =
+  new __tgt_device_image[desc->NumDeviceImages];
+
+  for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
+newDeviceImages[i].EntriesBegin = desc->DeviceImages[i].EntriesBegin;
+newDeviceImages[i].EntriesEnd = desc->DeviceImages[i].EntriesEnd;
+newDeviceImages[i].ImageStart = desc->DeviceImages[i].ImageStart;
+newDeviceImages[i].ImageEnd = desc->DeviceImages[i].ImageEnd;
+newDeviceImages[i].ImageInfo = nullptr;
+// TODO : delete(desc->DeviceImages[i]);
+  }
+
+  desc->DeviceImages = static_cast<__tgt_device_image *>(newDeviceImages);
+
+  this->RegisterLibV2(desc);
+}
+
+#define MAX_CAPS_STR_SIZE 1024
+void RTLsTy::RegisterLibV2(__tgt_bin_desc *desc) {
+
+  // Get the c

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-30 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

Also I probably should've discussed this earlier, but another potential 
solution is to use the binary format 

 that we use to embed the object files for the images as well. This is more 
similar to how CUDA does it. Making it backwards compatible would probably 
require just assuming it's an image with no information if it doesn't include 
the magic bytes. This might be better if we want a unified tool to give 
information on embedded device formats.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-26 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:229-232
+// store value of these variables (i.e. offload archs) into a custom
+// section which will be used by "offload-arch -f". It won't be
+// removed during binary stripping.
+GV->setSection(".offload_arch_list");

saiislam wrote:
> jhuber6 wrote:
> > Why does this need a custom section? We should just use it like this, not 
> > sure why we need these to  be anything but some internal struct.
> > ```
> > for (auto *Image : BinDesc->Images) {
> >   if (Image->Info)
> >// Use Info
> > }
> > ```
> Custom section is required so that it survives binary stripping done by 
> various OSes. `Offload-arch` tool will look into this section of the binary 
> to print list of supported architectures.
I see, I was also planning on making a tool that will extract these similar to 
`cuobjdump`, we could merge it into that as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-26 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added a comment.

Thanks for the detailed review. I will update rest of the patch soon.




Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:613-614
   CmdArgs.push_back("-shared");
+  std::string ArchArg = std::string("-plugin-opt=mcpu=").append(Arch.str());
+  CmdArgs.push_back(ArchArg);
   CmdArgs.push_back("-o");

jhuber6 wrote:
> Does this just pass the architecture to the AMD link? Is this related? If not 
> move it to a separate patch and I'll review it.
Yes, it passes the architecture to AMD link.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:229-232
+// store value of these variables (i.e. offload archs) into a custom
+// section which will be used by "offload-arch -f". It won't be
+// removed during binary stripping.
+GV->setSection(".offload_arch_list");

jhuber6 wrote:
> Why does this need a custom section? We should just use it like this, not 
> sure why we need these to  be anything but some internal struct.
> ```
> for (auto *Image : BinDesc->Images) {
>   if (Image->Info)
>// Use Info
> }
> ```
Custom section is required so that it survives binary stripping done by various 
OSes. `Offload-arch` tool will look into this section of the binary to print 
list of supported architectures.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-26 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam updated this revision to Diff 432352.
saiislam marked 8 inline comments as done.
saiislam added a comment.

Addressed some simple review changes. Will update remaining in the next 
iteration.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,108 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  // offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!image_info)
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+bool missing_capability = true;
+for (auto capability : caps)
+  if (capability == req)
+missing_capability = false;
+if (missing_capability) {
+  DP("Image requires %s but runtime capability %s is missing.\n",
+ image_info->offload_arch, req.c_str());
+  is_compatible = false;
+}
+  }
+  return is_compatible;
+}
+
+#define MAX_CAPS_STR_SIZE 1024
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  // Get the current active offload environment
+  __tgt_active_offload_env offload_env = {nullptr};
+  // Need a buffer to hold results of offload-arch -c command
+  size_t offload_arch_output_buffer_size = MAX_CAPS_STR_SIZE;
+  std::vector offload_arch_output_buffer;
+  offload_arch_output_buffer.resize(offload_arch_output_buffer_size);
+  __tgt_get_active_offload_env(&offload_env, offload_arch_output_buffer.data(),
+   offload_arch_output_buffer_size);
+
+  RTLInfoTy *FoundRTL = NULL;
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
   for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
 // Obtain the image.
 __tgt_device_image *img = &desc->DeviceImages[i];
 
-RTLInfoTy *FoundRTL = nullptr;
-
+// Get corresponding image info offload_arch and check with runtime
+if (!_ImageIsCompatibleWithEnv(i

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-25 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

Where is the code to change the target registration? We need a new 
initialization runtime call to use and change the old one to reallocate the 
`__tgt_device_image` array. Did you work around that some other way?




Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:613-614
   CmdArgs.push_back("-shared");
+  std::string ArchArg = std::string("-plugin-opt=mcpu=").append(Arch.str());
+  CmdArgs.push_back(ArchArg);
   CmdArgs.push_back("-o");

Does this just pass the architecture to the AMD link? Is this related? If not 
move it to a separate patch and I'll review it.



Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:1069
 bool WholeProgram = false;
+std::string TheArch = File.Arch;
 

Is this necessary?



Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:1153
   SmallVector, 4> ImagesToWrap;
+  SmallVector, 4> OffloadArchs;
+  std::string Arch;

This should be a StringRef, the data is stable. When you create the struct just 
use this and it will add the null terminator for you.
```
Constant *ConstantStr = ConstantDataArray::getString(M.getContext(), Str);
```



Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:1165
+
+if (Arch.empty()) {
+  Arch = std::string(File.Arch);

An empty `StringRef` should already create a null string when you create a 
ConstantDataArray, should be fine to just push it back.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:62-63
+//   int32_t version;
+//   int32_t image_number;
+//   int32_t number_images;
+//   char* offload_arch;

The struct should be contained within the image itself so we can query it 
during registration, why do we need to know the number?



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:64-65
+//   int32_t number_images;
+//   char* offload_arch;
+//   char* target_compile_opts;
+// };





Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:191-194
+  auto *NullPtr = llvm::ConstantPointerNull::get(Type::getInt8PtrTy(C));
+  unsigned int ImgCount = 0;
+  std::string OffloadArchBase = "__offload_arch";
+  std::string OffloadImageBase = "offload_image_info";

You should be able to pass the name to the constructor, if it's internal LLVM 
will give you a new name.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:214
 
-ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
-  ImageE, EntriesB, EntriesE));
+auto OArch = OffloadArchs[ImgCount];
+Constant *OArchV = ConstantDataArray::get(C, OArch);

I would just assert that the metadata vector's size matches the number of 
images, we're always going to generate these now so it makes sense to generate 
them in pairs. Then just use `llvm::zip` or just an iterator to go through both 
of these at the same time.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:215-227
+Constant *OArchV = ConstantDataArray::get(C, OArch);
+std::string OffloadArchGV(OffloadArchBase),
+OffloadImageGV(OffloadImageBase);
+if (ImgCount) {
+  auto Suffix = std::to_string(ImgCount);
+  OffloadArchGV.append(".").append(Suffix);
+  OffloadImageGV.append(".").append(Suffix);

Why do we need all these strings? Should just be generating a constant 
initializer.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:229-232
+// store value of these variables (i.e. offload archs) into a custom
+// section which will be used by "offload-arch -f". It won't be
+// removed during binary stripping.
+GV->setSection(".offload_arch_list");

Why does this need a custom section? We should just use it like this, not sure 
why we need these to  be anything but some internal struct.
```
for (auto *Image : BinDesc->Images) {
  if (Image->Info)
   // Use Info
}
```



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:240-241
+getImageInfoTy(M), ConstantInt::get(Type::getInt32Ty(C), 1),
+ConstantInt::get(Type::getInt32Ty(C), ImgCount++),
+ConstantInt::get(Type::getInt32Ty(C), (uint32_t)OffloadArchs.size()),
+RequirementVPtr,

Why do we need this when we should already know which image we're looking at?



Comment at: openmp/libomptarget/include/omptarget.h:122
 
+/// __tgt_image_info:
+///

This comment is extremely long. It should be sufficient to just say what it 
does, 3 lines max. Specify that it may be null in the device image struct.



Comment at: openmp/libomptarget/include/omptarget.h:145-1

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-25 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam updated this revision to Diff 431975.
saiislam added a comment.

Changed the embedding scheme to add ImageInfo field in __tgt_device_image.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,108 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  // offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!image_info)
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+bool missing_capability = true;
+for (auto capability : caps)
+  if (capability == req)
+missing_capability = false;
+if (missing_capability) {
+  DP("Image requires %s but runtime capability %s is missing.\n",
+ image_info->offload_arch, req.c_str());
+  is_compatible = false;
+}
+  }
+  return is_compatible;
+}
+
+#define MAX_CAPS_STR_SIZE 1024
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  // Get the current active offload environment
+  __tgt_active_offload_env offload_env = {nullptr};
+  // Need a buffer to hold results of offload-arch -c command
+  size_t offload_arch_output_buffer_size = MAX_CAPS_STR_SIZE;
+  std::vector offload_arch_output_buffer;
+  offload_arch_output_buffer.resize(offload_arch_output_buffer_size);
+  __tgt_get_active_offload_env(&offload_env, offload_arch_output_buffer.data(),
+   offload_arch_output_buffer_size);
+
+  RTLInfoTy *FoundRTL = NULL;
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
   for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
 // Obtain the image.
 __tgt_device_image *img = &desc->DeviceImages[i];
 
-RTLInfoTy *FoundRTL = nullptr;
-
+// Get corresponding image info offload_arch and check with runtime
+if (!_ImageIsCompatibleWithEnv(img->ImageInfo, &offload_env))
+  continue;
+

[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-06 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added a comment.

In D124525#3491170 , @jhuber6 wrote:

> I'm suggesting instead we define a new `__tgt_device_image` and a new 
> `__tgt_register_lib` function to support this. This new `__tgt_device_image` 
> will simply contain a pointer to an optional information struct.
>
>   struct __tgt_device_image_v2 {
> void* ImageStart;
> void* ImageEnd;
> __tgt_offload_entry*  EntriesBegin;
> __tgt_offload_entry*  EntriesEnd;
> __tgt_image_into*  ImageInfo;
>   };
>
> This new struct breaks the ABI with the old `__tgt_device_image` because 
> these are put into an array and we change the size, but we should be able to 
> provide backwards compatibility by copying from the old format to the new 
> format and creating a new array. We can detect the new vs. old ABI by 
> expecting that existing applications will call the `__tgt_register_image` 
> function. We will create a new `__tgt_register_image_v2` function for example 
> that all new programs will call. In `libomptarget` we then change 
> `__tgt_register_image` to do the necessary translation.
>
>   struct __tgt_bin_desc { 
>   
>
> int32_t NumDeviceImages;   // Number of device types supported
>   
>  
> __tgt_device_image *DeviceImages;  // Array of device images (1 per dev. 
> type)
> __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host 
> entries   
> __tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)
>   
>  
>   };
>   
>   EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
> __tgt_device_image_v2 *new_image = alloc_new_version(desc);
> desc->DeviceImages = new_Image;
> __tgt_register_lib_v2(desc);
>   }
>   
>   EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
> __tgt_unregister_lib_v2(desc);
> dealloc(desc->DeviceImages);
>   }
>
> Now the rest of `libomptarget` solely uses the new format, and we check if 
> information is available by seeing that the `ImageInfo` field is non-null.

Thanks for the input. I am going to try it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-05-04 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

I'm suggesting instead we define a new `__tgt_device_image` and a new 
`__tgt_register_lib` function to support this. This new `__tgt_device_image` 
will simply contain a pointer to an optional information struct.

  struct __tgt_device_image_v2 {
void*   ImageStart;
void*   ImageEnd;
__tgt_offload_entry*EntriesBegin;
__tgt_offload_entry*EntriesEnd;
__tgt_image_into*  ImageInfo;
  };

This new struct breaks the ABI with the old `__tgt_device_image` because these 
are put into an array and we change the size, but we should be able to provide 
backwards compatibility by copying from the old format to the new format and 
creating a new array. We can detect the new vs. old ABI by expecting that 
existing applications will call the `__tgt_register_image` function. We will 
create a new `__tgt_register_image_v2` function for example that all new 
programs will call. In `libomptarget` we then change `__tgt_register_image` to 
do the necessary translation.

  struct __tgt_bin_desc {   

   
int32_t NumDeviceImages;   // Number of device types supported  

 
__tgt_device_image *DeviceImages;  // Array of device images (1 per dev. 
type)
__tgt_offload_entry *HostEntriesBegin; // Begin of table with all host 
entries   
__tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)  

 
  };
  
  EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
__tgt_device_image_v2 *new_image = alloc_new_version(desc);
desc->DeviceImages = new_Image;
__tgt_register_lib_v2(desc);
  }
  
  EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { // Probably called 
by __tgt_unregister_lib_v2
dealloc(desc->DeviceImages);
  }

Now the rest of `libomptarget` solely uses the new format, and we check if 
information is available by seeing that the `ImageInfo` field is non-null.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:98-104
+  // struct __tgt_image_info {
+  //   int32_t version;
+  //   int32_t image_number;
+  //   int32_t number_images;
+  //   char* offload_arch;
+  //   char* target_compile_opts;
+  // };

yaxunl wrote:
> saiislam wrote:
> > yaxunl wrote:
> > > I am wondering whether we should add a few more fields to make it more 
> > > generic for all offloading languages and platforms:
> > > 
> > > 
> > > ```
> > > char* target_triple;
> > > char* offloading_kind; // openmp, hip, etc
> > > char* file_type; // elf, spirv, bitcode, etc
> > > ```
> > Good idea. Though I am not sure whether file_type info is being propagated 
> > in by the linker-wrapper or not. I will check.
> the file_type is for the file embedded by OffloadWrapper, therefore 
> OffloadWrapper knows it.
> 
> Currently it is fatbin or cubin for CUDA and code object (elf) for HIP.
> 
> In the future, it may be spirv or bitcode to allow JIT compilation of spirv 
> or bitcode in runtime so that one binary for all GPU's. OffloadWrapper will 
> decide what is the final file type to embed based on type of embedded 
> binaries in input files, target triple, and compile arguments.
> the file_type is for the file embedded by OffloadWrapper, therefore 
> OffloadWrapper knows it.
> 
> Currently it is fatbin or cubin for CUDA and code object (elf) for HIP.
> 
> In the future, it may be spirv or bitcode to allow JIT compilation of spirv 
> or bitcode in runtime so that one binary for all GPU's. OffloadWrapper will 
> decide what is the final file type to embed based on type of embedded 
> binaries in input files, target triple, and compile arguments.

Sorry I mean ClangLinkerWrapper. ClangLinkerWrapper should somehow pass the 
file type info to OffloadWrapper.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:98-104
+  // struct __tgt_image_info {
+  //   int32_t version;
+  //   int32_t image_number;
+  //   int32_t number_images;
+  //   char* offload_arch;
+  //   char* target_compile_opts;
+  // };

saiislam wrote:
> yaxunl wrote:
> > I am wondering whether we should add a few more fields to make it more 
> > generic for all offloading languages and platforms:
> > 
> > 
> > ```
> > char* target_triple;
> > char* offloading_kind; // openmp, hip, etc
> > char* file_type; // elf, spirv, bitcode, etc
> > ```
> Good idea. Though I am not sure whether file_type info is being propagated in 
> by the linker-wrapper or not. I will check.
the file_type is for the file embedded by OffloadWrapper, therefore 
OffloadWrapper knows it.

Currently it is fatbin or cubin for CUDA and code object (elf) for HIP.

In the future, it may be spirv or bitcode to allow JIT compilation of spirv or 
bitcode in runtime so that one binary for all GPU's. OffloadWrapper will decide 
what is the final file type to embed based on type of embedded binaries in 
input files, target triple, and compile arguments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-28 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:246
   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
+  // Create calls to __tgt_register_image_info for each image
+  auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());

saiislam wrote:
> jhuber6 wrote:
> > I'm wondering if it would be better to create a new `__tgt_bin_desc` and 
> > call a new `__tgt_register_lib` with it here so we don't need multiple 
> > calls here. Inside that new runtime function we could just widen or shrink 
> > the existing structs as needed. That way each device image would have this 
> > metadata associated with it and the target plugin can handle it as-needed.
> Last time multiple vendors objected to changing `__tgt_bin_desc` and 
> `__tgt_device_image` structs. The reason was backward compatibility of 
> multiple downstream runtimes.
If you keep the old `__tgt_register_lib` function, old applications will use 
that. Then inside libomptarget you can change `__tgt_register_lib` function to 
create a new `__tgt_device_image` from the old format. It would have some time 
penalty as we'd need to allocate some new structs, but it might be easier to 
deal with if each image contained its own metadata for the plugin to use. I was 
imagining it would look something like
```
struct __tgt_device_image_v2 {
  void* ImageStart;
  void* ImageEnd;
  __tgt_offload_entry*  EntriesBegin;
  __tgt_offload_entry*  EntriesEnd;
  __tgt_image_into *  ImageInfo;
};
```

Not sure if this is the best approach, but it should let us keep backwards 
compatibility and change the structs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-28 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added a comment.

In D124525#3478469 , @yaxunl wrote:

> need a test for the generated registration code

Yes, I will add tests.




Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:1161
 
-LinkedImages.push_back(*ImageOrErr);
+LinkedImages.emplace_back(TheArch, *ImageOrErr);
   }

jhuber6 wrote:
> I'm doing something similar in D123810, I just used the existing `DeviceFile` 
> because I needed the `Arch` and `Kind` fields to dispatch the appropriate 
> wrapping job for CUDA / HIP.
Seems simpler. I will pull that change here.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:98-104
+  // struct __tgt_image_info {
+  //   int32_t version;
+  //   int32_t image_number;
+  //   int32_t number_images;
+  //   char* offload_arch;
+  //   char* target_compile_opts;
+  // };

yaxunl wrote:
> I am wondering whether we should add a few more fields to make it more 
> generic for all offloading languages and platforms:
> 
> 
> ```
> char* target_triple;
> char* offloading_kind; // openmp, hip, etc
> char* file_type; // elf, spirv, bitcode, etc
> ```
Good idea. Though I am not sure whether file_type info is being propagated in 
by the linker-wrapper or not. I will check.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:246
   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
+  // Create calls to __tgt_register_image_info for each image
+  auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());

jhuber6 wrote:
> I'm wondering if it would be better to create a new `__tgt_bin_desc` and call 
> a new `__tgt_register_lib` with it here so we don't need multiple calls here. 
> Inside that new runtime function we could just widen or shrink the existing 
> structs as needed. That way each device image would have this metadata 
> associated with it and the target plugin can handle it as-needed.
Last time multiple vendors objected to changing `__tgt_bin_desc` and 
`__tgt_device_image` structs. The reason was backward compatibility of multiple 
downstream runtimes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-27 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

need a test for the generated registration code




Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:98-104
+  // struct __tgt_image_info {
+  //   int32_t version;
+  //   int32_t image_number;
+  //   int32_t number_images;
+  //   char* offload_arch;
+  //   char* target_compile_opts;
+  // };

I am wondering whether we should add a few more fields to make it more generic 
for all offloading languages and platforms:


```
char* target_triple;
char* offloading_kind; // openmp, hip, etc
char* file_type; // elf, spirv, bitcode, etc
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:1161
 
-LinkedImages.push_back(*ImageOrErr);
+LinkedImages.emplace_back(TheArch, *ImageOrErr);
   }

I'm doing something similar in D123810, I just used the existing `DeviceFile` 
because I needed the `Arch` and `Kind` fields to dispatch the appropriate 
wrapping job for CUDA / HIP.



Comment at: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp:246
   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
+  // Create calls to __tgt_register_image_info for each image
+  auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());

I'm wondering if it would be better to create a new `__tgt_bin_desc` and call a 
new `__tgt_register_lib` with it here so we don't need multiple calls here. 
Inside that new runtime function we could just widen or shrink the existing 
structs as needed. That way each device image would have this metadata 
associated with it and the target plugin can handle it as-needed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-27 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: openmp/libomptarget/include/omptarget.h:163
+  int32_t image_number;  // Image number in image library starting from 0
+  int32_t number_images; // Number of images, used for initial allocation
+  char *offload_arch;// e.g. sm_30, sm_70, gfx906, includes features

I doubt we want/need to number and count them. This seems fragile and 
unhelpful. "initial allocation" is not expensive, image numbers should be 
implicit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124525: [OpenMP][ClangLinkerWrapper] Extending linker wrapper to embed metadata for multi-arch fat binaries

2022-04-27 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam created this revision.
saiislam added reviewers: jdoerfert, JonChesterfield, jhuber6, yaxunl.
Herald added a subscriber: guansong.
Herald added a project: All.
saiislam requested review of this revision.
Herald added subscribers: openmp-commits, cfe-commits, sstefan1.
Herald added projects: clang, OpenMP.

This patch adds "__tgt_image_info" field for each of the images
embedded in a multi-arch image. Required changes in libomptarget
are also shown.

The information in "__tgt_image_info" struct is provided in the 
clang-linker-wrapper
as a call to __tgt_register_image_info for each image in the library
of images also created by the clang-linker-wrapper.
__tgt_register_image_info is called for each image BEFORE the single
call to __tgt_register_lib so that image information is available
before they are loaded. clang-linker-wrapper gets this image information
from command line arguments provided by the clang driver when it creates
the call to the __clang-linker-wrapper command.
This architecture allows the binary image (pointed to by ImageStart and
ImageEnd in __tgt_device_image) to remain architecture indenendent.
That is, the architecture independent part of the libomptarget runtime
does not need to peer inside the image to determine if it is loadable
even though in most cases the image is an elf object.
There is one __tgt_image_info for each __tgt_device_image. For backward
compabibility, no changes are allowed to either __tgt_device_image or
__tgt_bin_desc. The absense of __tgt_image_info is the indication that
the runtime is being used on a binary created by an old version of
the compiler.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/interface.cpp
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+#include "llvm/OffloadArch/OffloadArch.h"
 
 #include 
 #include 
@@ -20,6 +21,8 @@
 #include 
 #include 
 #include 
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include 
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,109 @@
 initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+ char *offload_arch_output_buffer,
+ size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead of
+  // querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+if (OffloadArchEnvVar) {
+  active_env->capabilities = OffloadArchEnvVar;
+  return;
+}
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  offload_arch_output_buffer_size))
+return;
+  active_env->capabilities = offload_arch_output_buffer;
+  return;
+}
+
+std::vector _splitstrings(char *input, const char *sep) {
+  std::vector split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+if (pos != 0)
+  split_strings.push_back(s.substr(0, pos));
+s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *img_info,
+  __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!img_info)
+return true;
+
+  if (!active_env->capabilities)
+return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the img_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector reqs = _splitstrings(img_info->offload_arch, ":");
+  std::vector caps = _splitstrings(active_env->capabilities, ":");