[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-19 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 532748.
jhuber6 added a comment.

I'm not sure why this keeps failing on Windows and have no clue how to tell 
what's going wrong. The builder simply says

  c:\ws\w4\llvm-project\premerge-checks\build\bin\clang-linker-wrapper.exe: 
error: invalid argument

But I'm unsure what could be causing that since it works on Linux and we have 
plenty of other tests that Windows passes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered b

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 531791.
jhuber6 added a comment.

Updating


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected>
-linkAndWrapDeviceFiles(SmallVectorImpl &LinkerInputFiles,
+linkAndWrapDeviceFiles(SmallVector> &LinkerInputFiles,
const InputArgList &Args, char **Argv, int Argc) {
   llvm::TimeTraceScope TimeScope("Handle all device input");
 
-  DenseMap> InputMap;
-  for (auto &File : LinkerInputFi

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/Driver/linker-wrapper.c:48
+// AMDGPU-LINK-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
+
 // RUN: clang-offload-packager -o %t.out \

yaxunl wrote:
> can we put some variables in the input bitcode so that we can check the 
> linked bitcode?
> 
> I would expect there will be only one linked bitcode for gfx90a:xnack+ and it 
> contains both variables.
> 
> I don't think it is a good idea to let the final object embed bitcode for 
> both gfx90a:xnack+ and gfx90a since that will result in an invalid container. 
> Therefore I think we should only do linking with target ID's from the first 
> container.
I can make a static library that's `gfx90a`, that covers the main case where we 
still link in the OpenMP runtime library that's compiled with `90a` if the user 
uses `90a:xnack+`. I'd need to place a random external variable to force it to 
extract however.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: clang/test/Driver/linker-wrapper.c:48
+// AMDGPU-LINK-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
+
 // RUN: clang-offload-packager -o %t.out \

can we put some variables in the input bitcode so that we can check the linked 
bitcode?

I would expect there will be only one linked bitcode for gfx90a:xnack+ and it 
contains both variables.

I don't think it is a good idea to let the final object embed bitcode for both 
gfx90a:xnack+ and gfx90a since that will result in an invalid container. 
Therefore I think we should only do linking with target ID's from the first 
container.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 531756.
jhuber6 added a comment.

Hopefully fixing test on Windows. I think it's fine to let the packager bundle
mutliple of these now since it's caught in `clang`. So if the user really wants
to force it we should allow them to since the bundler format is just a simple
fatbinary.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected>
-linkAndWrapDeviceFiles(SmallVectorImpl &LinkerInputFiles,
+linkAndWrapD

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D152882#4422797 , @yaxunl wrote:

> In D152882#4422788 , @jhuber6 wrote:
>
>> In D152882#4421138 , @yaxunl wrote:
>>
>>> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode 
>>> of target ID gfx90a as long as they are from different containers. So there 
>>> are two rules about target ID: 1. compatibility rules for objects/bitcode 
>>> in the same container 2. compatibility rules for linking bitcode of 
>>> different target ID's.
>>>
>>> we need tests for both rules.
>>
>> So I'm wondering why I'm allowed to do 
>> `--offload-arch=gfx90a,gfx90a:xnack+`. Shouldn't that be caught by 
>> `getConflictTargetIDCombination`? That seems like the proper place to 
>> diagnose this.
>
> clang --offload-arch=gfx90a,gfx90a:xnack+ -c a.hip
> clang: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+' 
> (for a specific processor, a feature should either exist in all offload 
> archs, or not exist in any offload archs)
>
> At least it is caught for HIP. OpenMP may not check that.



  if (Kind != Action::OFK_HIP)
return std::nullopt;

Yes, this would be the culprit. Guessing we shouldn't do that?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D152882#4422788 , @jhuber6 wrote:

> In D152882#4421138 , @yaxunl wrote:
>
>> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode 
>> of target ID gfx90a as long as they are from different containers. So there 
>> are two rules about target ID: 1. compatibility rules for objects/bitcode in 
>> the same container 2. compatibility rules for linking bitcode of different 
>> target ID's.
>>
>> we need tests for both rules.
>
> So I'm wondering why I'm allowed to do `--offload-arch=gfx90a,gfx90a:xnack+`. 
> Shouldn't that be caught by `getConflictTargetIDCombination`? That seems like 
> the proper place to diagnose this.

clang --offload-arch=gfx90a,gfx90a:xnack+ -c a.hip
clang: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+' 
(for a specific processor, a feature should either exist in all offload archs, 
or not exist in any offload archs)

At least it is caught for HIP. OpenMP may not check that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D152882#4421138 , @yaxunl wrote:

> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode of 
> target ID gfx90a as long as they are from different containers. So there are 
> two rules about target ID: 1. compatibility rules for objects/bitcode in the 
> same container 2. compatibility rules for linking bitcode of different target 
> ID's.
>
> we need tests for both rules.

So I'm wondering why I'm allowed to do `--offload-arch=gfx90a,gfx90a:xnack+`. 
Shouldn't that be caught by `getConflictTargetIDCombination`? That seems like 
the proper place to diagnose this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 531520.
jhuber6 added a comment.
Herald added subscribers: kerbowa, jvesely.

Adjusting test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected>
-linkAndWrapDeviceFiles(SmallVectorImpl &LinkerInputFiles,
+linkAndWrapDeviceFiles(SmallVector> &LinkerInputFiles,
const InputArgList &Args, char **Argv, int Argc) {
   llvm::TimeTraceScope TimeScope("Handle all device input");
 
-  Den

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D152882#4421138 , @yaxunl wrote:

> The design of target ID put constraints on target ID's that can be embedded 
> into one executable 
> https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id . For 
> example, gfx90a and gfx90a:xnack+ cannot be embedded into one executable 
> since this will cause difficulty for runtime to choose device binary to run, 
> especially when there are multiple target ID features. clang does not allow 
> --offload-arch=gfx90a and --offload-arch=gfx90a:xnack+ to be used together to 
> compile HIP programs. It would be preferred for offloack-packager to enforce 
> this constraint too.
>
> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode of 
> target ID gfx90a as long as they are from different containers. So there are 
> two rules about target ID: 1. compatibility rules for objects/bitcode in the 
> same container 2. compatibility rules for linking bitcode of different target 
> ID's.
>
> we need tests for both rules.

Should that be a follow-up patch? Or one included here. It definitely 
influences the test so I can change that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

The design of target ID put constraints on target ID's that can be embedded 
into one executable 
https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id . For 
example, gfx90a and gfx90a:xnack+ cannot be embedded into one executable since 
this will cause difficulty for runtime to choose device binary to run, 
especially when there are multiple target ID features. clang does not allow 
--offload-arch=gfx90a and --offload-arch=gfx90a:xnack+ to be used together to 
compile HIP programs. It would be preferred for offloack-packager to enforce 
this constraint too.

However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode of 
target ID gfx90a as long as they are from different containers. So there are 
two rules about target ID: 1. compatibility rules for objects/bitcode in the 
same container 2. compatibility rules for linking bitcode of different target 
ID's.

we need tests for both rules.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tra, yaxunl.
Herald added a subscriber: tpr.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, jplehr, sstefan1, MaskRay.
Herald added a reviewer: jdoerfert.
Herald added projects: clang, LLVM.

Currently the linker wrapper strictly assigns a single input binary to a
single link job based off of its input architecture. This is not
sufficient to implement the AMDGPU target ID correctly as this could
have many compatible architectures participating in multiple links.

This patch introduces the ability to have a single binary input be
linked multiple times. For example, given the following, we will now
link in the static library where previously we would not.

  clang foo.c -fopenmp --offload-arch=gfx90a
  llvm-ar rcs libfoo.a foo.o
  clang foo.c -fopenmp --offload-arch=gfx90a:xnack+ libfoo.a

This also means that given the following we will link the basic input
twice, but that's on the user for providing two versions.

  clang foo.c -fopenmp --offload-arch=gfx90a,gfx90a:xnack+

This should allow us to also support a "generic" target in the future
for IR without a specific architecture.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this comp