[clang] [lld] [flang] [llvm] [AMDGPU] Introduce Code Object V6 (PR #76954)

2024-01-24 Thread Konstantin Zhuravlyov via cfe-commits


@@ -620,6 +620,15 @@ void ScalarBitSetTraits::bitset(IO &IO,
   BCase(EF_AMDGPU_FEATURE_XNACK_V3);
   BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
   break;
+case ELF::ELFABIVERSION_AMDGPU_HSA_V6:

kzhuravl wrote:

I think we'd need to add a test for this?

https://github.com/llvm/llvm-project/pull/76954
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [lld] [flang] [llvm] [AMDGPU] Introduce Code Object V6 (PR #76954)

2024-01-07 Thread Pierre van Houtryve via cfe-commits

https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/76954

>From dc666323870118020c0fd386d19d8306d4c853e1 Mon Sep 17 00:00:00 2001
From: pvanhout 
Date: Thu, 4 Jan 2024 14:12:00 +0100
Subject: [PATCH 1/2] [AMDGPU] Introduce Code Object V6

Introduce Code Object V6 in Clang, LLD, Flang and LLVM.
This is the same as V5 except a new "generic version" flag can be present in 
EFLAGS. This is related to new generic targets that'll be added in a follow-up 
patch. It's also likely V6 will have new changes (possibly new metadata 
entries) added later.

Docs change are not included, I'm planning to do them in a follow-up patch all 
at once (when generic targets land too).
---
 clang/include/clang/Driver/Options.td |   4 +-
 clang/lib/CodeGen/CGBuiltin.cpp   |   6 +-
 clang/lib/Driver/ToolChains/CommonArgs.cpp|   2 +-
 .../amdgpu-code-object-version-linking.cu |  37 +++
 .../CodeGenCUDA/amdgpu-code-object-version.cu |   4 +
 .../test/CodeGenCUDA/amdgpu-workgroup-size.cu |   4 +
 .../amdgcn/bitcode/oclc_abi_version_600.bc|   0
 clang/test/Driver/hip-code-object-version.hip |  12 +
 clang/test/Driver/hip-device-libs.hip |  18 +-
 flang/lib/Frontend/CompilerInvocation.cpp |   2 +
 flang/test/Lower/AMD/code-object-version.f90  |   3 +-
 lld/ELF/Arch/AMDGPU.cpp   |  22 ++
 lld/test/ELF/amdgpu-tid.s |  16 ++
 llvm/include/llvm/BinaryFormat/ELF.h  |  12 +-
 llvm/include/llvm/Support/AMDGPUMetadata.h|   5 +
 llvm/include/llvm/Support/ScopedPrinter.h |   4 +-
 llvm/include/llvm/Target/TargetOptions.h  |   1 +
 llvm/lib/ObjectYAML/ELFYAML.cpp   |   6 +
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |   3 +
 .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp  |  10 +
 .../Target/AMDGPU/AMDGPUHSAMetadataStreamer.h |  11 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp |  27 +++
 .../MCTargetDesc/AMDGPUTargetStreamer.h   |   1 +
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp|  13 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   5 +-
 ...licit-kernarg-backend-usage-global-isel.ll |   2 +
 .../AMDGPU/call-graph-register-usage.ll   |   1 +
 .../AMDGPU/codegen-internal-only-func.ll  |   2 +
 llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll  |   4 +
 .../enable-scratch-only-dynamic-stack.ll  |   1 +
 .../AMDGPU/implicit-kernarg-backend-usage.ll  |   2 +
 .../AMDGPU/implicitarg-offset-attributes.ll   |  46 
 .../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll |   1 +
 llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll  |   1 +
 llvm/test/CodeGen/AMDGPU/recursion.ll |   1 +
 .../AMDGPU/resource-usage-dead-function.ll|   1 +
 .../AMDGPU/tid-mul-func-xnack-all-any.ll  |   6 +
 .../tid-mul-func-xnack-all-not-supported.ll   |   6 +
 .../AMDGPU/tid-mul-func-xnack-all-off.ll  |   6 +
 .../AMDGPU/tid-mul-func-xnack-all-on.ll   |   6 +
 .../AMDGPU/tid-mul-func-xnack-any-off-1.ll|   6 +
 .../AMDGPU/tid-mul-func-xnack-any-off-2.ll|   6 +
 .../AMDGPU/tid-mul-func-xnack-any-on-1.ll |   6 +
 .../AMDGPU/tid-mul-func-xnack-any-on-2.ll |   6 +
 .../tid-one-func-xnack-not-supported.ll   |   6 +
 .../CodeGen/AMDGPU/tid-one-func-xnack-off.ll  |   6 +
 .../CodeGen/AMDGPU/tid-one-func-xnack-on.ll   |   6 +
 .../MC/AMDGPU/hsa-v5-uses-dynamic-stack.s |   5 +
 llvm/tools/llvm-readobj/ELFDumper.cpp | 222 --
 49 files changed, 448 insertions(+), 135 deletions(-)
 create mode 100644 
clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..0bfe0e7739960e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4753,9 +4753,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
-  Values<"none,4,5">,
+  Values<"none,4,5,6">,
   NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
-  NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
+  NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
   MarshallingInfoEnum, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f71dbf1729a1d6..be86731ed912ea 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17481,9 +17481,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
 /// Emit code based on Code Object ABI version.
 /// COV_4: Emit code to use dispatch ptr
-/// COV_5: Emit code to use implicitarg ptr
+/// COV_5+   : Emit code to use implicitarg ptr
 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_versi