This revision was automatically updated to reflect the committed changes.
yaxunl marked an inline comment as done.
Closed by commit rGdc6a0b0ec7e3: [HIP] Align device binary (authored by yaxunl).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88734/new/

https://reviews.llvm.org/D88734

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/CodeGenCUDA/device-stub.cu
  clang/test/Driver/clang-offload-bundler.c
  clang/test/Driver/hip-toolchain-no-rdc.hip
  clang/test/Driver/hip-toolchain-rdc.hip
  clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp

Index: clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
===================================================================
--- clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -94,6 +94,11 @@
              "instead of actually executing them - for testing purposes.\n"),
     cl::init(false), cl::cat(ClangOffloadBundlerCategory));
 
+static cl::opt<unsigned>
+    BundleAlignment("bundle-align",
+                    cl::desc("Alignment of bundle for binary files"),
+                    cl::init(1), cl::cat(ClangOffloadBundlerCategory));
+
 /// Magic string that marks the existence of offloading data.
 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
 
@@ -223,6 +228,9 @@
   StringMap<BundleInfo>::iterator CurBundleInfo;
   StringMap<BundleInfo>::iterator NextBundleInfo;
 
+  /// Current bundle target to be written.
+  std::string CurWriteBundleTarget;
+
 public:
   BinaryFileHandler() : FileHandler() {}
 
@@ -337,10 +345,12 @@
     unsigned Idx = 0;
     for (auto &T : TargetNames) {
       MemoryBuffer &MB = *Inputs[Idx++];
+      HeaderSize = alignTo(HeaderSize, BundleAlignment);
       // Bundle offset.
       Write8byteIntegerToBuffer(OS, HeaderSize);
       // Size of the bundle (adds to the next bundle's offset)
       Write8byteIntegerToBuffer(OS, MB.getBufferSize());
+      BundlesInfo[T] = BundleInfo(MB.getBufferSize(), HeaderSize);
       HeaderSize += MB.getBufferSize();
       // Size of the triple
       Write8byteIntegerToBuffer(OS, T.size());
@@ -351,6 +361,7 @@
   }
 
   Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+    CurWriteBundleTarget = TargetTriple.str();
     return Error::success();
   }
 
@@ -359,6 +370,8 @@
   }
 
   Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+    auto BI = BundlesInfo[CurWriteBundleTarget];
+    OS.seek(BI.Offset);
     OS.write(Input.getBufferStart(), Input.getBufferSize());
     return Error::success();
   }
Index: clang/test/Driver/hip-toolchain-rdc.hip
===================================================================
--- clang/test/Driver/hip-toolchain-rdc.hip
+++ clang/test/Driver/hip-toolchain-rdc.hip
@@ -8,10 +8,14 @@
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
 // RUN:   -fuse-ld=lld -fgpu-rdc -nogpuinc \
+// RUN:   -fhip-dump-offload-linker-script \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s
 
+// check code object alignment in dumped llvm-mc input
+// CHECK: .p2align 12
+
 // emit objects for host side path
 // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
@@ -87,6 +91,7 @@
 
 // combine images generated into hip fat binary object
 // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// CHECK-SAME: "-bundle-align=4096"
 // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
 // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]"
 
Index: clang/test/Driver/hip-toolchain-no-rdc.hip
===================================================================
--- clang/test/Driver/hip-toolchain-no-rdc.hip
+++ clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -81,6 +81,7 @@
 //
 
 // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// CHECK-SAME: "-bundle-align=4096"
 // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
 // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_A_803]],[[IMG_DEV_A_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]"
 
@@ -143,6 +144,7 @@
 //
 
 // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// CHECK-SAME: "-bundle-align=4096"
 // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
 // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_B_803]],[[IMG_DEV_B_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]"
 
Index: clang/test/Driver/clang-offload-bundler.c
===================================================================
--- clang/test/Driver/clang-offload-bundler.c
+++ clang/test/Driver/clang-offload-bundler.c
@@ -278,6 +278,16 @@
 // RUN: diff %t.empty %t.res.tgt1
 // RUN: diff %t.empty %t.res.tgt2
 
+//
+// Check -bundle-align option
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.bc,%t.tgt1,%t.tgt2 -outputs=%t.bundle3.bc
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.bc,%t.res.tgt1,%t.res.tgt2 -inputs=%t.bundle3.bc -unbundle
+// RUN: diff %t.bc %t.res.bc
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
 // Some code so that we can create a binary out of this file.
 int A = 0;
 void test_func(void) {
Index: clang/test/CodeGenCUDA/device-stub.cu
===================================================================
--- clang/test/CodeGenCUDA/device-stub.cu
+++ clang/test/CodeGenCUDA/device-stub.cu
@@ -115,7 +115,7 @@
 // ALL: @4 = private unnamed_addr constant [21 x i8] c"ext_constant_var_def\00"
 // * constant unnamed string with GPU binary
 // CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
-// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
+// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",{{.*}}align 4096
 // HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
 // CUDANORDC-SAME: section ".nv_fatbin", align 8
 // CUDARDC-SAME: section "__nv_relfatbin", align 8
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -16,6 +16,7 @@
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
+#include "llvm/Support/Alignment.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TargetParser.h"
@@ -33,6 +34,7 @@
 #endif
 
 namespace {
+const unsigned HIPCodeObjectAlign = 4096;
 
 static void addBCLib(const Driver &D, const ArgList &Args,
                      ArgStringList &CmdArgs, ArgStringList LibraryPaths,
@@ -108,6 +110,8 @@
   // for different GPU archs.
   ArgStringList BundlerArgs;
   BundlerArgs.push_back(Args.MakeArgString("-type=o"));
+  BundlerArgs.push_back(
+      Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign)));
 
   // ToDo: Remove the dummy host binary entry which is required by
   // clang-offload-bundler.
@@ -175,7 +179,8 @@
   ObjStream << "  .section .hip_fatbin,\"aMS\",@progbits,1\n";
   ObjStream << "  .data\n";
   ObjStream << "  .globl __hip_fatbin\n";
-  ObjStream << "  .p2align 3\n";
+  ObjStream << "  .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
+            << "\n";
   ObjStream << "__hip_fatbin:\n";
   ObjStream << "  .incbin \"" << BundleFile << "\"\n";
   ObjStream.flush();
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -597,8 +597,10 @@
     if (CudaGpuBinary) {
       // If fatbin is available from early finalization, create a string
       // literal containing the fat binary loaded from the given file.
-      FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()),
-                                     "", FatbinConstantName, 8);
+      const unsigned HIPCodeObjectAlign = 4096;
+      FatBinStr =
+          makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
+                             FatbinConstantName, HIPCodeObjectAlign);
     } else {
       // If fatbin is not available, create an external symbol
       // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to