from:"Joseph Huber via Phabricator via cfe\\\-commits"

[PATCH] D154036: [libc] Add support for creating wrapper headers for offloading in clang

2023-06-29 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 535983.
jhuber6 added a comment.

Add some checks to `stdlib.h` to ensure ABI compatibility for `div` functions.

Because this patch makes us always include the LLVM libc repo when offloading,
we mask off the wrappers if they were not installed by `libc`, so this is simply
a passthrough include header if the user didn't build with LLVM C library for
the GPU.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154036/new/

https://reviews.llvm.org/D154036

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/llvm_libc_wrappers/ctype.h
  clang/lib/Headers/llvm_libc_wrappers/llvm-libc-decls/README.txt
  clang/lib/Headers/llvm_libc_wrappers/stdio.h
  clang/lib/Headers/llvm_libc_wrappers/stdlib.h
  clang/lib/Headers/llvm_libc_wrappers/string.h
  clang/test/Driver/gpu-libc-headers.c
  libc/cmake/modules/LLVMLibCHeaderRules.cmake
  libc/include/CMakeLists.txt
  libc/utils/HdrGen/Generator.cpp
  libc/utils/HdrGen/Generator.h
  libc/utils/HdrGen/Main.cpp

Index: libc/utils/HdrGen/Main.cpp
===
--- libc/utils/HdrGen/Main.cpp
+++ libc/utils/HdrGen/Main.cpp
@@ -32,6 +32,9 @@
 llvm::cl::list ReplacementValues(
 "args", llvm::cl::desc("Command separated = pairs."),
 llvm::cl::value_desc("[,name=value]"));
+llvm::cl::opt ExportDecls(
+"export-decls",
+llvm::cl::desc("Output a new header containing only the entrypoints."));
 
 void ParseArgValuePairs(std::unordered_map &Map) {
   for (std::string &R : ReplacementValues) {
@@ -48,7 +51,10 @@
   std::unordered_map ArgMap;
   ParseArgValuePairs(ArgMap);
   Generator G(HeaderDefFile, EntrypointNamesOption, StandardHeader, ArgMap);
-  G.generate(OS, Records);
+  if (ExportDecls)
+G.generateDecls(OS, Records);
+  else
+G.generate(OS, Records);
 
   return false;
 }
Index: libc/utils/HdrGen/Generator.h
===
--- libc/utils/HdrGen/Generator.h
+++ libc/utils/HdrGen/Generator.h
@@ -52,6 +52,7 @@
 ArgMap(Map) {}
 
   void generate(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
+  void generateDecls(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
 };
 
 } // namespace llvm_libc
Index: libc/utils/HdrGen/Generator.cpp
===
--- libc/utils/HdrGen/Generator.cpp
+++ libc/utils/HdrGen/Generator.cpp
@@ -10,6 +10,7 @@
 
 #include "IncludeFileCommand.h"
 #include "PublicAPICommand.h"
+#include "utils/LibcTableGenUtil/APIIndexer.h"
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -116,4 +117,78 @@
   }
 }
 
+void Generator::generateDecls(llvm::raw_ostream &OS,
+  llvm::RecordKeeper &Records) {
+
+  OS << "//===-- C standard declarations for " << StdHeader << " "
+ << std::string(80 - (42 + StdHeader.size()), '-') << "===//\n"
+ << "//\n"
+ << "// Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+"Exceptions.\n"
+ << "// See https://llvm.org/LICENSE.txt for license information.\n"
+ << "// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ << "//\n"
+ << "//"
+"===---"
+"---===//\n\n";
+
+  std::string HeaderGuard(StdHeader.size(), '\0');
+  llvm::transform(StdHeader, HeaderGuard.begin(), [](const char C) -> char {
+return !isalnum(C) ? '_' : llvm::toUpper(C);
+  });
+  OS << "#ifndef __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n"
+ << "#define __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n\n";
+
+  OS << "#ifndef __LIBC_ATTRS\n"
+ << "#define __LIBC_ATTRS\n"
+ << "#endif\n\n";
+
+  OS << "#ifdef __cplusplus\n"
+ << "extern \"C\" {\n"
+ << "#endif\n\n";
+
+  APIIndexer G(StdHeader, Records);
+  for (auto &Name : EntrypointNameList) {
+// Filter out functions not exported by this header.
+if (G.FunctionSpecMap.find(Name) == G.FunctionSpecMap.end())
+  continue;
+
+llvm::Record *FunctionSpec = G.FunctionSpecMap[Name];
+llvm::Record *RetValSpec = FunctionSpec->getValueAsDef("Return");
+llvm::Record *ReturnType = RetValSpec->getValueAsDef("ReturnType");
+
+OS << G.getTypeAsString(ReturnType) << " " << Name << "(";
+
+auto ArgsList = FunctionSpec->getValueAsListOfDefs("Args");
+for (size_t i = 0; i < ArgsList.size(); ++i) {
+  llvm::Record *ArgType = ArgsList[i]->getValueAsDef("ArgType");
+  OS << G.getTypeAsString(ArgType);
+  if (i < ArgsList.size() - 1)
+OS << ", ";
+}
+
+OS << ") __LIBC_ATTRS;\n\n";
+  }
+
+  // Make another pass over entrypoints to emit object declarations.
+  for (const auto &Name : EntrypointNameList) {
+if (G.ObjectSpecMap.find(Name) == G.ObjectSpecMap.end())
+  continue;
+llvm::Record *ObjectSpec = G.ObjectSpecMap[Name]

[PATCH] D154145: [HIP] Fix -mllvm option for device lld linker

2023-06-29 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 accepted this revision.
jhuber6 added a comment.
This revision is now accepted and ready to land.

LG, thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154145/new/

https://reviews.llvm.org/D154145

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D154036: [libc] Add support for creating wrapper headers for offloading in clang

2023-06-29 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 535835.
jhuber6 added a comment.

Semi-fix hack for `string.h` and fix `ctype.h`. `string.h` required undefining
C++ mode so we didn't use weird GNU C++ handling, which we then still need the
`extern "C"` for. The cytpe problems come from GNU defining everything as a
macro so it fails to redeclare.

The amount of hacks that just this has required so far is fairly convincing to
me that this is the more correct solution and should be separate from `libc`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154036/new/

https://reviews.llvm.org/D154036

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/llvm_libc_wrappers/ctype.h
  clang/lib/Headers/llvm_libc_wrappers/llvm-libc-decls/README.txt
  clang/lib/Headers/llvm_libc_wrappers/stdio.h
  clang/lib/Headers/llvm_libc_wrappers/stdlib.h
  clang/lib/Headers/llvm_libc_wrappers/string.h
  clang/test/Driver/gpu-libc-headers.c
  libc/cmake/modules/LLVMLibCHeaderRules.cmake
  libc/include/CMakeLists.txt
  libc/utils/HdrGen/Generator.cpp
  libc/utils/HdrGen/Generator.h
  libc/utils/HdrGen/Main.cpp

Index: libc/utils/HdrGen/Main.cpp
===
--- libc/utils/HdrGen/Main.cpp
+++ libc/utils/HdrGen/Main.cpp
@@ -32,6 +32,9 @@
 llvm::cl::list ReplacementValues(
 "args", llvm::cl::desc("Command separated = pairs."),
 llvm::cl::value_desc("[,name=value]"));
+llvm::cl::opt ExportDecls(
+"export-decls",
+llvm::cl::desc("Output a new header containing only the entrypoints."));
 
 void ParseArgValuePairs(std::unordered_map &Map) {
   for (std::string &R : ReplacementValues) {
@@ -48,7 +51,10 @@
   std::unordered_map ArgMap;
   ParseArgValuePairs(ArgMap);
   Generator G(HeaderDefFile, EntrypointNamesOption, StandardHeader, ArgMap);
-  G.generate(OS, Records);
+  if (ExportDecls)
+G.generateDecls(OS, Records);
+  else
+G.generate(OS, Records);
 
   return false;
 }
Index: libc/utils/HdrGen/Generator.h
===
--- libc/utils/HdrGen/Generator.h
+++ libc/utils/HdrGen/Generator.h
@@ -52,6 +52,7 @@
 ArgMap(Map) {}
 
   void generate(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
+  void generateDecls(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
 };
 
 } // namespace llvm_libc
Index: libc/utils/HdrGen/Generator.cpp
===
--- libc/utils/HdrGen/Generator.cpp
+++ libc/utils/HdrGen/Generator.cpp
@@ -10,6 +10,7 @@
 
 #include "IncludeFileCommand.h"
 #include "PublicAPICommand.h"
+#include "utils/LibcTableGenUtil/APIIndexer.h"
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -116,4 +117,78 @@
   }
 }
 
+void Generator::generateDecls(llvm::raw_ostream &OS,
+  llvm::RecordKeeper &Records) {
+
+  OS << "//===-- C standard declarations for " << StdHeader << " "
+ << std::string(80 - (42 + StdHeader.size()), '-') << "===//\n"
+ << "//\n"
+ << "// Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+"Exceptions.\n"
+ << "// See https://llvm.org/LICENSE.txt for license information.\n"
+ << "// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ << "//\n"
+ << "//"
+"===---"
+"---===//\n\n";
+
+  std::string HeaderGuard(StdHeader.size(), '\0');
+  llvm::transform(StdHeader, HeaderGuard.begin(), [](const char C) -> char {
+return !isalnum(C) ? '_' : llvm::toUpper(C);
+  });
+  OS << "#ifndef __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n"
+ << "#define __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n\n";
+
+  OS << "#ifndef __LIBC_ATTRS\n"
+ << "#define __LIBC_ATTRS\n"
+ << "#endif\n\n";
+
+  OS << "#ifdef __cplusplus\n"
+ << "extern \"C\" {\n"
+ << "#endif\n\n";
+
+  APIIndexer G(StdHeader, Records);
+  for (auto &Name : EntrypointNameList) {
+// Filter out functions not exported by this header.
+if (G.FunctionSpecMap.find(Name) == G.FunctionSpecMap.end())
+  continue;
+
+llvm::Record *FunctionSpec = G.FunctionSpecMap[Name];
+llvm::Record *RetValSpec = FunctionSpec->getValueAsDef("Return");
+llvm::Record *ReturnType = RetValSpec->getValueAsDef("ReturnType");
+
+OS << G.getTypeAsString(ReturnType) << " " << Name << "(";
+
+auto ArgsList = FunctionSpec->getValueAsListOfDefs("Args");
+for (size_t i = 0; i < ArgsList.size(); ++i) {
+  llvm::Record *ArgType = ArgsList[i]->getValueAsDef("ArgType");
+  OS << G.getTypeAsString(ArgType);
+  if (i < ArgsList.size() - 1)
+OS << ", ";
+}
+
+OS << ") __LIBC_ATTRS;\n\n";
+  }
+
+  // Make another pass over entrypoints to emit object declarations.
+  for (const auto &Name : EntrypointNameList) {
+if (G.ObjectSpecMap.find(Name)

[PATCH] D154036: [libc] Add support for creating wrapper headers for offloading in clang

2023-06-28 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 535600.
jhuber6 added a comment.

Hack around the `string` problem. GNU likes to provide different prototypes for 
C++. Manually disable this for now. Unsure if this will have reasonable 
fallout, but it seems bizarre that `string.h` would define C++ constructs?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154036/new/

https://reviews.llvm.org/D154036

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/llvm_libc_wrappers/ctype.h
  clang/lib/Headers/llvm_libc_wrappers/llvm-libc-decls/README.txt
  clang/lib/Headers/llvm_libc_wrappers/stdio.h
  clang/lib/Headers/llvm_libc_wrappers/stdlib.h
  clang/lib/Headers/llvm_libc_wrappers/string.h
  clang/test/Driver/gpu-libc-headers.c
  libc/cmake/modules/LLVMLibCHeaderRules.cmake
  libc/include/CMakeLists.txt
  libc/utils/HdrGen/Generator.cpp
  libc/utils/HdrGen/Generator.h
  libc/utils/HdrGen/Main.cpp

Index: libc/utils/HdrGen/Main.cpp
===
--- libc/utils/HdrGen/Main.cpp
+++ libc/utils/HdrGen/Main.cpp
@@ -32,6 +32,9 @@
 llvm::cl::list ReplacementValues(
 "args", llvm::cl::desc("Command separated = pairs."),
 llvm::cl::value_desc("[,name=value]"));
+llvm::cl::opt ExportDecls(
+"export-decls",
+llvm::cl::desc("Output a new header containing only the entrypoints."));
 
 void ParseArgValuePairs(std::unordered_map &Map) {
   for (std::string &R : ReplacementValues) {
@@ -48,7 +51,10 @@
   std::unordered_map ArgMap;
   ParseArgValuePairs(ArgMap);
   Generator G(HeaderDefFile, EntrypointNamesOption, StandardHeader, ArgMap);
-  G.generate(OS, Records);
+  if (ExportDecls)
+G.generateDecls(OS, Records);
+  else
+G.generate(OS, Records);
 
   return false;
 }
Index: libc/utils/HdrGen/Generator.h
===
--- libc/utils/HdrGen/Generator.h
+++ libc/utils/HdrGen/Generator.h
@@ -52,6 +52,7 @@
 ArgMap(Map) {}
 
   void generate(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
+  void generateDecls(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
 };
 
 } // namespace llvm_libc
Index: libc/utils/HdrGen/Generator.cpp
===
--- libc/utils/HdrGen/Generator.cpp
+++ libc/utils/HdrGen/Generator.cpp
@@ -10,6 +10,7 @@
 
 #include "IncludeFileCommand.h"
 #include "PublicAPICommand.h"
+#include "utils/LibcTableGenUtil/APIIndexer.h"
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -116,4 +117,78 @@
   }
 }
 
+void Generator::generateDecls(llvm::raw_ostream &OS,
+  llvm::RecordKeeper &Records) {
+
+  OS << "//===-- C standard declarations for " << StdHeader << " "
+ << std::string(80 - (42 + StdHeader.size()), '-') << "===//\n"
+ << "//\n"
+ << "// Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+"Exceptions.\n"
+ << "// See https://llvm.org/LICENSE.txt for license information.\n"
+ << "// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ << "//\n"
+ << "//"
+"===---"
+"---===//\n\n";
+
+  std::string HeaderGuard(StdHeader.size(), '\0');
+  llvm::transform(StdHeader, HeaderGuard.begin(), [](const char C) -> char {
+return !isalnum(C) ? '_' : llvm::toUpper(C);
+  });
+  OS << "#ifndef __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n"
+ << "#define __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n\n";
+
+  OS << "#ifndef __LIBC_ATTRS\n"
+ << "#define __LIBC_ATTRS\n"
+ << "#endif\n\n";
+
+  OS << "#ifdef __cplusplus\n"
+ << "extern \"C\" {\n"
+ << "#endif\n\n";
+
+  APIIndexer G(StdHeader, Records);
+  for (auto &Name : EntrypointNameList) {
+// Filter out functions not exported by this header.
+if (G.FunctionSpecMap.find(Name) == G.FunctionSpecMap.end())
+  continue;
+
+llvm::Record *FunctionSpec = G.FunctionSpecMap[Name];
+llvm::Record *RetValSpec = FunctionSpec->getValueAsDef("Return");
+llvm::Record *ReturnType = RetValSpec->getValueAsDef("ReturnType");
+
+OS << G.getTypeAsString(ReturnType) << " " << Name << "(";
+
+auto ArgsList = FunctionSpec->getValueAsListOfDefs("Args");
+for (size_t i = 0; i < ArgsList.size(); ++i) {
+  llvm::Record *ArgType = ArgsList[i]->getValueAsDef("ArgType");
+  OS << G.getTypeAsString(ArgType);
+  if (i < ArgsList.size() - 1)
+OS << ", ";
+}
+
+OS << ") __LIBC_ATTRS;\n\n";
+  }
+
+  // Make another pass over entrypoints to emit object declarations.
+  for (const auto &Name : EntrypointNameList) {
+if (G.ObjectSpecMap.find(Name) == G.ObjectSpecMap.end())
+  continue;
+llvm::Record *ObjectSpec = G.ObjectSpecMap[Name];
+auto Type = ObjectSpec->getValueAsString("Type");
+OS << "extern " << Type << " " << Na

[PATCH] D154036: [libc] Add support for creating wrapper headers for offloading in clang

2023-06-28 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

For reference, here is what one of the newly generated headers looks like that 
is used.

  #ifndef __LLVM_LIBC_DECLARATIONS_STDIO_H
  #define __LLVM_LIBC_DECLARATIONS_STDIO_H
  
  #ifndef __LIBC_ATTRS
  #define __LIBC_ATTRS
  #endif
  
  #ifdef __cplusplus
  extern "C" {
  #endif
  
  int puts(const char *__restrict) __LIBC_ATTRS;
  
  int fputs(const char *__restrict, FILE *__restrict) __LIBC_ATTRS;
  
  extern FILE * stdin __LIBC_ATTRS;
  extern FILE * stdout __LIBC_ATTRS;
  extern FILE * stderr __LIBC_ATTRS;
  
  #ifdef __cplusplus
  }
  #endif

Unfortunately I have already run into a few problems with the re-declarations 
given the GNU `libc` headers. Here is the error message when including 
`string.h` now,

  
/home/jhuber/Documents/llvm/clang/lib/clang/17/include/llvm_libc_wrappers/llvm-libc-decls/string.h:54:8:
 error: 'strstr' is missing exception specification 'noexcept(true)'
 54 | char * strstr(const char *, const char *) __LIBC_ATTRS;
|^
  /usr/include/string.h:343:1: note: previous declaration is here
343 | strstr (const char *__haystack, const char *__needle) __THROW
| ^
  5 errors generated.

This occurs for `memchr`, `strchr`, `strpbrk`, `strchr`, and `strstr`. If you 
define `__LIBC_ATTRS` to the `noexcept(true)` you get a different error,

  
/home/jhuber/Documents/llvm/clang/lib/clang/17/include/llvm_libc_wrappers/llvm-libc-decls/string.h:54:8:
 error: functions that differ only in their return type cannot be overloaded
 54 | char * strstr(const char *, const char *) __LIBC_ATTRS;
| ~~ ^
  /usr/include/string.h:343:1: note: previous definition is here
343 | __extern_always_inline const char *
|  ~~
344 | strstr (const char *__haystack, const char *__needle) __THROW
| ^

Looking at the definitions, they look like this in the GNU headers,

  extern char *strstr (char *__haystack, const char *__needle)
   __THROW __asm ("strstr") __attribute_pure__ __nonnull ((1, 2));
  extern const char *strstr (const char *__haystack, const char *__needle)
   __THROW __asm ("strstr") __attribute_pure__ __nonnull ((1, 2));

Does anyone have any suggestions on working around this? The other supported 
headers work as far as I can tell.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154036/new/

https://reviews.llvm.org/D154036

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D154036: [libc] Add support for creating wrapper headers for offloading in clang

2023-06-28 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, tianshilei1992, sivachandra, lntue, 
michaelrj, tra, JonChesterfield.
Herald added projects: libc-project, All.
Herald added a subscriber: libc-commits.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1, MaskRay.
Herald added a project: clang.

This is an alternate approach to the patches proposed in D153897 
 and
D153794 . Rather than exporting a single 
header that can be included on
the GPU in all circumstances, this patch chooses to instead generate a
separate set of headers that only provides the declarations. This can
then be used by external tooling to set up what's on the GPU. This
leaves room for header hacks for offloading languages without needing to
worry about the `libc` implementation.

Currently this generates a set of headers that only contain the
declarations. These will then be installed to a new clang resource
directory called `llvm_libc_wrappers/` which will house the shim code.
We can then automaticlaly include this from `clang` when offloading to
wrap around the headers while specifying what's on the GPU.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154036

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/llvm_libc_wrappers/ctype.h
  clang/lib/Headers/llvm_libc_wrappers/llvm-libc-decls/README.txt
  clang/lib/Headers/llvm_libc_wrappers/stdio.h
  clang/lib/Headers/llvm_libc_wrappers/stdlib.h
  clang/lib/Headers/llvm_libc_wrappers/string.h
  clang/test/Driver/gpu-libc-headers.c
  libc/cmake/modules/LLVMLibCHeaderRules.cmake
  libc/include/CMakeLists.txt
  libc/utils/HdrGen/Generator.cpp
  libc/utils/HdrGen/Generator.h
  libc/utils/HdrGen/Main.cpp

Index: libc/utils/HdrGen/Main.cpp
===
--- libc/utils/HdrGen/Main.cpp
+++ libc/utils/HdrGen/Main.cpp
@@ -32,6 +32,9 @@
 llvm::cl::list ReplacementValues(
 "args", llvm::cl::desc("Command separated = pairs."),
 llvm::cl::value_desc("[,name=value]"));
+llvm::cl::opt ExportDecls(
+"export-decls",
+llvm::cl::desc("Output a new header containing only the entrypoints."));
 
 void ParseArgValuePairs(std::unordered_map &Map) {
   for (std::string &R : ReplacementValues) {
@@ -48,7 +51,10 @@
   std::unordered_map ArgMap;
   ParseArgValuePairs(ArgMap);
   Generator G(HeaderDefFile, EntrypointNamesOption, StandardHeader, ArgMap);
-  G.generate(OS, Records);
+  if (ExportDecls)
+G.generateDecls(OS, Records);
+  else
+G.generate(OS, Records);
 
   return false;
 }
Index: libc/utils/HdrGen/Generator.h
===
--- libc/utils/HdrGen/Generator.h
+++ libc/utils/HdrGen/Generator.h
@@ -52,6 +52,7 @@
 ArgMap(Map) {}
 
   void generate(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
+  void generateDecls(llvm::raw_ostream &OS, llvm::RecordKeeper &Records);
 };
 
 } // namespace llvm_libc
Index: libc/utils/HdrGen/Generator.cpp
===
--- libc/utils/HdrGen/Generator.cpp
+++ libc/utils/HdrGen/Generator.cpp
@@ -10,6 +10,7 @@
 
 #include "IncludeFileCommand.h"
 #include "PublicAPICommand.h"
+#include "utils/LibcTableGenUtil/APIIndexer.h"
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -116,4 +117,78 @@
   }
 }
 
+void Generator::generateDecls(llvm::raw_ostream &OS,
+  llvm::RecordKeeper &Records) {
+
+  OS << "//===-- C standard declarations for " << StdHeader << " "
+ << std::string(80 - (42 + StdHeader.size()), '-') << "===//\n"
+ << "//\n"
+ << "// Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+"Exceptions.\n"
+ << "// See https://llvm.org/LICENSE.txt for license information.\n"
+ << "// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ << "//\n"
+ << "//"
+"===---"
+"---===//\n\n";
+
+  std::string HeaderGuard(StdHeader.size(), '\0');
+  llvm::transform(StdHeader, HeaderGuard.begin(), [](const char C) -> char {
+return !isalnum(C) ? '_' : llvm::toUpper(C);
+  });
+  OS << "#ifndef __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n"
+ << "#define __LLVM_LIBC_DECLARATIONS_" << HeaderGuard << "\n\n";
+
+  OS << "#ifndef __LIBC_ATTRS\n"
+ << "#define __LIBC_ATTRS\n"
+ << "#endif\n\n";
+
+  OS << "#ifdef __cplusplus\n"
+ << "extern \"C\" {\n"
+ << "#endif\n\n";
+
+  APIIndexer G(StdHeader, Records);
+  for (auto &Name : EntrypointNameList) {
+// Filter out functions not exported by this header.
+if (G.FunctionSpecMap.find(Name) == G.FunctionSpecMap.end())
+  continue;
+
+llvm::Record *FunctionSpec = G.FunctionSpecMap[Name];
+llvm::Record *RetValSpec = Funct

[PATCH] D153909: [AMDGPU] Always pass `-mcpu` to the `lld` linker

2023-06-28 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG765301183f58: [AMDGPU] Always pass `-mcpu` to the `lld` 
linker (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153909/new/

https://reviews.llvm.org/D153909

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,8 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefixes=LTO,MCPU %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN:   -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
+// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -545,6 +545,9 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  else if (Args.hasArg(options::OPT_mcpu_EQ))
+CmdArgs.push_back(Args.MakeArgString(
+"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
   CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,8 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefixes=LTO,MCPU %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN:   -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
+// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -545,6 +545,9 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  else if (Args.hasArg(options::OPT_mcpu_EQ))
+CmdArgs.push_back(Args.MakeArgString(
+"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
   CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153667: [HIP]: Add gpu-link-output to control link job creation

2023-06-28 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D153667#4455943 , @yaxunl wrote:

> For -fno-gpu-rdc case we do not use lto. Since -fno-gpu-rdc has one TU only, 
> we use the non-lto backend to get relocatable object, and use lld for 
> relocatable to shared object. This patch allows us to stop at the relocatable 
> object since comgr needs that.

I see, so conceptually this is like `-Xarch-device -c` (if such a thing worked)?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153667/new/

https://reviews.llvm.org/D153667

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153909: [AMDGPU] Always pass `-mcpu` to the `lld` linker

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D153909#4454383 , @JonChesterfield 
wrote:

> I thought lld did the right thing if you pass it raw IR, without specifying 
> an arch, but on reflection it might just be silently miscompiling things. The 
> test doesn't seem to involve a specific type of input file, does clang foo.ll 
> -o a.out pass a mcpu flag along?
>
> It's a bit weird that the architecture doesn't seem to be embedded in the IR 
> file (e.g. you have to pass it to llc) but that's out of scope here

it markedly does not work without it. I don't think there's currently 
facilities to define a module-wide architecture, there's only function-level 
metadata.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153909/new/

https://reviews.llvm.org/D153909

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153909: [AMDGPU] Always pass `-mcpu` to the `lld` linker

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, yaxunl, tra.
Herald added subscribers: kerbowa, tpr, dstuttard, jvesely, kzhuravl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, wdng.
Herald added a project: clang.

Currently, AMDGPU more or less only supports linking with LTO. If the
user does not either pass `-flto` or `-Wl,-plugin-opt=mcpu=` manually
linking will fail because the architecture's aren't compatible. THis
patch simply passes `-mcpu` by default if it was specified. Should be a
no-op if it's not actually used.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153909

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,8 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefixes=LTO,MCPU %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN:   -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
+// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -545,6 +545,9 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  else if (Args.hasArg(options::OPT_mcpu_EQ))
+CmdArgs.push_back(Args.MakeArgString(
+"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
   CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,8 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefixes=LTO,MCPU %s
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN:   -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
+// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -545,6 +545,9 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  else if (Args.hasArg(options::OPT_mcpu_EQ))
+CmdArgs.push_back(Args.MakeArgString(
+"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
   CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153883: [Clang][OpenMP] Enable use of __kmpc_alloc_shared for VLAs defined in AMD GPU offloaded regions

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp:1085
   }
-  for (const auto *VD : I->getSecond().EscapedVariableLengthDecls) {
-// Use actual memory size of the VLA object including the padding

doru1004 wrote:
> ABataev wrote:
> > Why this code is removed?
> I could not understand why this code is here in the first place since it 
> doesn't seem that it could ever work correctly (and it doesn't seem to be 
> covered by any existing tests). Maybe I'm wrong but that was my understanding 
> of it. So what seems to happen is that this code attempts to emit a 
> kmpc_alloc_shared before the actual size calculation is emitted. So if the 
> VLA size is something that the user defines such as `int N = 10;` then that 
> code will not have been emitted at this point. When the expression computing 
> the size of the VLA uses `N`, the code that is deleted here would just fail 
> to find the VLA size in the attempt to emit the kmpc_alloc_shared. The 
> emission of the VLA as kmpc_alloc_shared needs to happen after the expression 
> of the size is emitted.
I'm pretty sure I was the one that wrote this code, and at the time I don't 
recall it really working. I remember there was something else that expected 
this to be here, but for what utility I do not recall. VLAs were never tested 
or used.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153883/new/

https://reviews.llvm.org/D153883

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153883: [Clang][OpenMP] Enable use of __kmpc_alloc_shared for VLAs defined in AMD GPU offloaded regions

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

So this is implementing the `stacksave` using `__kmpc_alloc_shared` instead? It 
makes sense since the OpenMP standard expects sharing for the stack. I wonder 
how this interfaces with `-fopenmp-cuda-mode`.




Comment at: clang/lib/CodeGen/CGDecl.cpp:1603
+// deallocation call of __kmpc_free_shared() is emitted later.
+if (getLangOpts().OpenMP && getTarget().getTriple().isAMDGCN()) {
+  // Emit call to __kmpc_alloc_shared() instead of the alloca.

Does NVPTX handle this already? If not, is there a compelling reason to exclude 
NVPTX? Otherwise we should check if we are the OpenMP device.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153883/new/

https://reviews.llvm.org/D153883

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1d699bf2664d: [OpenMP] Always apply target declarations to 
canonical definitions (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D153369?vs=534950&id=534970#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153369/new/

https://reviews.llvm.org/D153369

Files:
  clang/lib/AST/AttrImpl.cpp
  clang/test/OpenMP/declare_target_codegen.cpp


Index: clang/test/OpenMP/declare_target_codegen.cpp
===
--- clang/test/OpenMP/declare_target_codegen.cpp
+++ clang/test/OpenMP/declare_target_codegen.cpp
@@ -27,11 +27,13 @@
 // CHECK-DAG: Bake
 // CHECK-NOT: @{{hhh|ggg|fff|eee}} =
 // CHECK-DAG: @flag = protected global i8 undef,
+// CHECK-DAG: @dx = {{protected | }}global i32 0,
+// CHECK-DAG: @dy = {{protected | }}global i32 0,
 // CHECK-DAG: @aaa = external global i32,
-// CHECK-DAG: @bbb ={{ protected | }}global i32 0,
+// CHECK-DAG: @bbb = {{protected | }}global i32 0,
 // CHECK-DAG: weak constant %struct.__tgt_offload_entry { ptr @bbb,
 // CHECK-DAG: @ccc = external global i32,
-// CHECK-DAG: @ddd ={{ protected | }}global i32 0,
+// CHECK-DAG: @ddd = {{protected | }}global i32 0,
 // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global ptr null
@@ -51,10 +53,21 @@
 // CHECK-DAG: define {{.*}}i32 @{{.*}}{{foo|bar|baz2|baz3|FA|f_method}}{{.*}}()
 // CHECK-DAG: define {{.*}}void @{{.*}}TemplateClass{{.*}}(ptr {{[^,]*}} 
%{{.*}})
 // CHECK-DAG: define {{.*}}i32 @{{.*}}TemplateClass{{.*}}f_method{{.*}}(ptr 
{{[^,]*}} %{{.*}})
-// CHECK-DAG: define {{.*}}void 
@__omp_offloading_{{.*}}_globals_l[[@LINE+78]]_ctor()
+// CHECK-DAG: define {{.*}}void 
@__omp_offloading_{{.*}}_globals_l[[@LINE+89]]_ctor()
 
 #ifndef HEADER
 #define HEADER
+
+int dx = 0;
+extern int dx;
+#pragma omp declare target to(dx)
+
+int dy = 0;
+#pragma omp begin declare target
+
+extern int dy;
+#pragma omp end declare target
+
 #pragma omp declare target
 bool flag [[clang::loader_uninitialized]];
 extern int bbb;
Index: clang/lib/AST/AttrImpl.cpp
===
--- clang/lib/AST/AttrImpl.cpp
+++ clang/lib/AST/AttrImpl.cpp
@@ -151,14 +151,16 @@
 
 std::optional
 OMPDeclareTargetDeclAttr::getActiveAttr(const ValueDecl *VD) {
-  if (!VD->hasAttrs())
+  if (llvm::all_of(VD->redecls(), [](const Decl *D) { return !D->hasAttrs(); 
}))
 return std::nullopt;
   unsigned Level = 0;
   OMPDeclareTargetDeclAttr *FoundAttr = nullptr;
-  for (auto *Attr : VD->specific_attrs()) {
-if (Level <= Attr->getLevel()) {
-  Level = Attr->getLevel();
-  FoundAttr = Attr;
+  for (const Decl *D : VD->redecls()) {
+for (auto *Attr : D->specific_attrs()) {
+  if (Level <= Attr->getLevel()) {
+Level = Attr->getLevel();
+FoundAttr = Attr;
+  }
 }
   }
   if (FoundAttr)


Index: clang/test/OpenMP/declare_target_codegen.cpp
===
--- clang/test/OpenMP/declare_target_codegen.cpp
+++ clang/test/OpenMP/declare_target_codegen.cpp
@@ -27,11 +27,13 @@
 // CHECK-DAG: Bake
 // CHECK-NOT: @{{hhh|ggg|fff|eee}} =
 // CHECK-DAG: @flag = protected global i8 undef,
+// CHECK-DAG: @dx = {{protected | }}global i32 0,
+// CHECK-DAG: @dy = {{protected | }}global i32 0,
 // CHECK-DAG: @aaa = external global i32,
-// CHECK-DAG: @bbb ={{ protected | }}global i32 0,
+// CHECK-DAG: @bbb = {{protected | }}global i32 0,
 // CHECK-DAG: weak constant %struct.__tgt_offload_entry { ptr @bbb,
 // CHECK-DAG: @ccc = external global i32,
-// CHECK-DAG: @ddd ={{ protected | }}global i32 0,
+// CHECK-DAG: @ddd = {{protected | }}global i32 0,
 // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global ptr null
@@ -51,10 +53,21 @@
 // CHECK-DAG: define {{.*}}i32 @{{.*}}{{foo|bar|baz2|baz3|FA|f_method}}{{.*}}()
 // CHECK-DAG: define {{.*}}void @{{.*}}TemplateClass{{.*}}(ptr {{[^,]*}} %{{.*}})
 // CHECK-DAG: define {{.*}}i32 @{{.*}}TemplateClass{{.*}}f_method{{.*}}(ptr {{[^,]*}} %{{.*}})
-// CHECK-DAG: define {{.*}}void @__omp_offloading_{{.*}}_globals_l[[@LINE+78]]_ctor()
+// CHECK-DAG: define {{.*}}void @__omp_offloading_{{.*}}_globals_l[[@LINE+89]]_ctor()
 
 #ifndef HEADER
 #define HEADER
+
+int dx = 0;
+extern int dx;
+#pragma omp declare target to(dx)
+
+int dy = 0;
+#pragma omp begin declare target
+
+extern int dy;
+#pragma omp end declare target
+
 #pragma omp declare target
 bool flag [[clang::loader_uninitialized]];
 extern int bbb;
Index: clang/lib/AST/AttrImpl.cpp
===
--- clang/lib/

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 534950.
jhuber6 added a comment.

Updating to use `VD->redecls()`.

Thanks for pointing that out, couldn't find it when I looked initially.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153369/new/

https://reviews.llvm.org/D153369

Files:
  clang/lib/AST/AttrImpl.cpp
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/declare_target_codegen.cpp


Index: clang/test/OpenMP/declare_target_codegen.cpp
===
--- clang/test/OpenMP/declare_target_codegen.cpp
+++ clang/test/OpenMP/declare_target_codegen.cpp
@@ -27,11 +27,13 @@
 // CHECK-DAG: Bake
 // CHECK-NOT: @{{hhh|ggg|fff|eee}} =
 // CHECK-DAG: @flag = protected global i8 undef,
+// CHECK-DAG: @dx = {{protected | }}global i32 0,
+// CHECK-DAG: @dy = {{protected | }}global i32 0,
 // CHECK-DAG: @aaa = external global i32,
-// CHECK-DAG: @bbb ={{ protected | }}global i32 0,
+// CHECK-DAG: @bbb = {{protected | }}global i32 0,
 // CHECK-DAG: weak constant %struct.__tgt_offload_entry { ptr @bbb,
 // CHECK-DAG: @ccc = external global i32,
-// CHECK-DAG: @ddd ={{ protected | }}global i32 0,
+// CHECK-DAG: @ddd = {{protected | }}global i32 0,
 // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global ptr null
@@ -51,10 +53,21 @@
 // CHECK-DAG: define {{.*}}i32 @{{.*}}{{foo|bar|baz2|baz3|FA|f_method}}{{.*}}()
 // CHECK-DAG: define {{.*}}void @{{.*}}TemplateClass{{.*}}(ptr {{[^,]*}} 
%{{.*}})
 // CHECK-DAG: define {{.*}}i32 @{{.*}}TemplateClass{{.*}}f_method{{.*}}(ptr 
{{[^,]*}} %{{.*}})
-// CHECK-DAG: define {{.*}}void 
@__omp_offloading_{{.*}}_globals_l[[@LINE+78]]_ctor()
+// CHECK-DAG: define {{.*}}void 
@__omp_offloading_{{.*}}_globals_l[[@LINE+89]]_ctor()
 
 #ifndef HEADER
 #define HEADER
+
+int dx = 0;
+extern int dx;
+#pragma omp declare target to(dx)
+
+int dy = 0;
+#pragma omp begin declare target
+
+extern int dy;
+#pragma omp end declare target
+
 #pragma omp declare target
 bool flag [[clang::loader_uninitialized]];
 extern int bbb;
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -22988,6 +22988,7 @@
   ND->addAttr(A);
   if (ASTMutationListener *ML = Context.getASTMutationListener())
 ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
+
   checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc);
 }
 
Index: clang/lib/AST/AttrImpl.cpp
===
--- clang/lib/AST/AttrImpl.cpp
+++ clang/lib/AST/AttrImpl.cpp
@@ -151,14 +151,16 @@
 
 std::optional
 OMPDeclareTargetDeclAttr::getActiveAttr(const ValueDecl *VD) {
-  if (!VD->hasAttrs())
+  if (llvm::all_of(VD->redecls(), [](const Decl *D) { return !D->hasAttrs(); 
}))
 return std::nullopt;
   unsigned Level = 0;
   OMPDeclareTargetDeclAttr *FoundAttr = nullptr;
-  for (auto *Attr : VD->specific_attrs()) {
-if (Level <= Attr->getLevel()) {
-  Level = Attr->getLevel();
-  FoundAttr = Attr;
+  for (const Decl *D : VD->redecls()) {
+for (auto *Attr : D->specific_attrs()) {
+  if (Level <= Attr->getLevel()) {
+Level = Attr->getLevel();
+FoundAttr = Attr;
+  }
 }
   }
   if (FoundAttr)


Index: clang/test/OpenMP/declare_target_codegen.cpp
===
--- clang/test/OpenMP/declare_target_codegen.cpp
+++ clang/test/OpenMP/declare_target_codegen.cpp
@@ -27,11 +27,13 @@
 // CHECK-DAG: Bake
 // CHECK-NOT: @{{hhh|ggg|fff|eee}} =
 // CHECK-DAG: @flag = protected global i8 undef,
+// CHECK-DAG: @dx = {{protected | }}global i32 0,
+// CHECK-DAG: @dy = {{protected | }}global i32 0,
 // CHECK-DAG: @aaa = external global i32,
-// CHECK-DAG: @bbb ={{ protected | }}global i32 0,
+// CHECK-DAG: @bbb = {{protected | }}global i32 0,
 // CHECK-DAG: weak constant %struct.__tgt_offload_entry { ptr @bbb,
 // CHECK-DAG: @ccc = external global i32,
-// CHECK-DAG: @ddd ={{ protected | }}global i32 0,
+// CHECK-DAG: @ddd = {{protected | }}global i32 0,
 // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global ptr null
 // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global ptr null
@@ -51,10 +53,21 @@
 // CHECK-DAG: define {{.*}}i32 @{{.*}}{{foo|bar|baz2|baz3|FA|f_method}}{{.*}}()
 // CHECK-DAG: define {{.*}}void @{{.*}}TemplateClass{{.*}}(ptr {{[^,]*}} %{{.*}})
 // CHECK-DAG: define {{.*}}i32 @{{.*}}TemplateClass{{.*}}f_method{{.*}}(ptr {{[^,]*}} %{{.*}})
-// CHECK-DAG: define {{.*}}void @__omp_offloading_{{.*}}_globals_l[[@LINE+78]]_ctor()
+// CHECK-DAG: define {{.*}}void @__omp_offloading_{{.*}}_globals_l[[@LINE+89]]_ctor()
 
 #ifndef HEADER
 #define HEADER
+
+int dx = 0;
+extern int dx;
+#pragma omp declare target to(dx)
+
+int dy = 0;
+#pragma omp begin d

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-27 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D153369#4451993 , @ABataev wrote:

> Did you try instead fix the OMPDeclareTargetDeclAttr::getActiveAttr() 
> function to make it look through all the declarations and return the 
> attribute from the first found instead of adding a new attribute?

I originally tried that but found that once we've found a canonical 
declaration, nothing will really bind to the new non-canonical definition. So 
the only way to do it would be to scan the entire file with the source manager 
as far as I could tell. I could be wrong though, I'm not as familiar with Clang 
here.




Comment at: clang/lib/Sema/SemaOpenMP.cpp:23105-23112
+// If this was not a canonical definition we need to update it as well.
+if (auto *CD = dyn_cast(D->getCanonicalDecl())) {
+  if (!CD->hasAttr()) {
+CD->addAttr(A);
+if (ASTMutationListener *ML = Context.getASTMutationListener())
+  ML->DeclarationMarkedOpenMPDeclareTarget(CD, A);
+  }

ABataev wrote:
> Here it would be better to reconstruct the attribute and make it implicit 
> attribute
I thought it was already implcit since we use 
`OMPDeclareTargetDeclAttr::CreateImplicit` above, what would be different in 
the new version?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153369/new/

https://reviews.llvm.org/D153369

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153667: [HIP]: Add gpu-link-output to control link job creation

2023-06-26 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D153667#4450705 , @jrbyrnes wrote:

> In D153667#4450517 , @jhuber6 wrote:
>
>> What's the difference here between this and the existing `--hip-link`?
>
> Hi @jhuber6
>
> The commit is poorly named, the main purpose is to introduce 
> `-no-gpu-link-output.`
>
> We want a way to produce relocatable from source. In terms of the Driver, 
> this means building actions and jobs for phases up to `phases::Assemble`. 
> `-no- gpu-link-output` does this by overriding BuildActions to stop after 
> `phases::Assemble` (similar to `-no-gpu-bundle-output`). `-gpu-link-output` 
> is NFCI. COMGR would be the client of this, and it would be up to COMGR to 
> handle linking of the relocatable.
>
> AFAICT, `-hip-link` allows for linking of offload-bundles, so it is 
> conceptually different. We can get (somewhat) close to what we with 
> `-emit-llvm -hip-link`, but that is probably more due to `-emit-llvm`. 
> `-hip-link` by itself produces linker actions / jobs which what we are trying 
> to avoid here.

So, you run the backend and obtain a relocatable ELF, but do not link it via 
`lld`? If I'm understanding this correctly, that would be the difference 
between `-flto` and `-fno-lto`, or `-foffload-lto` and `-fno-offload-lto`, 
AMDGPU always having `-flto` on currently. Also I recall AMDGPU / HIP 
completely disabling the backend step at some point, so it only emits LLVM-IR.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153667/new/

https://reviews.llvm.org/D153667

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153667: [HIP]: Add gpu-link-output to control link job creation

2023-06-26 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

What's the difference here between this and the existing `--hip-link`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153667/new/

https://reviews.llvm.org/D153667

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-26 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153369/new/

https://reviews.llvm.org/D153369

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153568: [ClangPackager] Add an option to extract inputs to an archive

2023-06-23 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG869baa912573: [ClangPackager] Add an option to extract 
inputs to an archive (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153568/new/

https://reviews.llvm.org/D153568

Files:
  clang/test/Driver/offload-packager.c
  clang/tools/clang-offload-packager/ClangOffloadPackager.cpp

Index: clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
===
--- clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
+++ clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
@@ -15,6 +15,7 @@
 #include "clang/Basic/Version.h"
 
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/OffloadBinary.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileOutputBuffer.h"
@@ -49,6 +50,14 @@
  cl::value_desc("=,..."),
  cl::cat(ClangOffloadPackagerCategory));
 
+static cl::opt
+CreateArchive("archive",
+  cl::desc("Write extracted files to a static archive"),
+  cl::cat(ClangOffloadPackagerCategory));
+
+/// Path of the current binary.
+static const char *PackagerExecutable;
+
 static void PrintVersion(raw_ostream &OS) {
   OS << clang::getClangToolFullVersion("clang-offload-packager") << '\n';
 }
@@ -69,6 +78,18 @@
   return Args;
 }
 
+static Error writeFile(StringRef Filename, StringRef Data) {
+  Expected> OutputOrErr =
+  FileOutputBuffer::create(Filename, Data.size());
+  if (!OutputOrErr)
+return OutputOrErr.takeError();
+  std::unique_ptr Output = std::move(*OutputOrErr);
+  llvm::copy(Data, Output->getBufferStart());
+  if (Error E = Output->commit())
+return E;
+  return Error::success();
+}
+
 static Error bundleImages() {
   SmallVector BinaryData;
   raw_svector_ostream OS(BinaryData);
@@ -111,13 +132,8 @@
 OS << Buffer->getBuffer();
   }
 
-  Expected> OutputOrErr =
-  FileOutputBuffer::create(OutputFile, BinaryData.size());
-  if (!OutputOrErr)
-return OutputOrErr.takeError();
-  std::unique_ptr Output = std::move(*OutputOrErr);
-  std::copy(BinaryData.begin(), BinaryData.end(), Output->getBufferStart());
-  if (Error E = Output->commit())
+  if (Error E = writeFile(OutputFile,
+  StringRef(BinaryData.begin(), BinaryData.size(
 return E;
   return Error::success();
 }
@@ -145,8 +161,9 @@
 StringSaver Saver(Alloc);
 auto Args = getImageArguments(Image, Saver);
 
-for (uint64_t I = 0, E = Binaries.size(); I != E; ++I) {
-  const auto *Binary = Binaries[I].getBinary();
+SmallVector Extracted;
+for (const OffloadFile &File : Binaries) {
+  const auto *Binary = File.getBinary();
   // We handle the 'file' and 'kind' identifiers differently.
   bool Match = llvm::all_of(Args, [&](auto &Arg) {
 const auto [Key, Value] = Arg;
@@ -156,27 +173,45 @@
   return Binary->getOffloadKind() == getOffloadKind(Value);
 return Binary->getString(Key) == Value;
   });
-  if (!Match)
-continue;
-
-  // If the user did not provide a filename derive one from the input and
-  // image.
-  StringRef Filename =
-  !Args.count("file")
-  ? Saver.save(sys::path::stem(InputFile) + "-" +
-   Binary->getTriple() + "-" + Binary->getArch() + "." +
-   std::to_string(I) + "." +
-   getImageKindName(Binary->getImageKind()))
-  : Args["file"];
-
-  Expected> OutputOrErr =
-  FileOutputBuffer::create(Filename, Binary->getImage().size());
-  if (!OutputOrErr)
-return OutputOrErr.takeError();
-  std::unique_ptr Output = std::move(*OutputOrErr);
-  llvm::copy(Binary->getImage(), Output->getBufferStart());
-  if (Error E = Output->commit())
+  if (Match)
+Extracted.push_back(Binary);
+}
+
+if (Extracted.empty())
+  continue;
+
+if (CreateArchive) {
+  if (!Args.count("file"))
+return createStringError(inconvertibleErrorCode(),
+ "Image must have a 'file' argument.");
+
+  SmallVector Members;
+  for (const OffloadBinary *Binary : Extracted)
+Members.emplace_back(MemoryBufferRef(
+Binary->getImage(),
+Binary->getMemoryBufferRef().getBufferIdentifier()));
+
+  if (Error E = writeArchive(Args["file"], Members, true,
+ Archive::getDefaultKindForHost(), true, false,
+ nullptr))
 return E;
+} else if (Args.count("file")) {
+  if (Extracted.size() > 1)
+WithColor::warning(errs(), PackagerExecutable)
+<< "Multiple inputs match to a single file, '" << Args["file"]
+<< "'\n";
+  if (Error E = writ

[PATCH] D153568: [ClangPackager] Add an option to extract inputs to an archive

2023-06-23 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D153568#4445133 , @JonChesterfield 
wrote:

> LGTM. As a meta level comment, we have far too many of these binary munging 
> sorts of tools.

Definitely agree. This is supposed to be a direct repalcement for 
`clang-offload-bundler`, but that is still in use by HIP and I don't know if 
it's something we could remove in a reasonable amount of time.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153568/new/

https://reviews.llvm.org/D153568

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153578: [Clang] Disable `libc` headers for offloading languages

2023-06-22 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG767852b1a8e3: [Clang] Disable `libc` headers for offloading 
languages (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153578/new/

https://reviews.llvm.org/D153578

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/gpu-libc-headers.c


Index: clang/test/Driver/gpu-libc-headers.c
===
--- clang/test/Driver/gpu-libc-headers.c
+++ clang/test/Driver/gpu-libc-headers.c
@@ -1,18 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
-// RUN: -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
-// RUN: -fopenmp-targets=nvptx64-nvidia-cuda 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib 
--sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib 
--sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include{{.*}}gpu-none-llvm"{{.*}}"-isysroot" "./"
-
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
 // RUN: -nogpuinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1181,16 +1181,18 @@
 
   // If we are compiling for a GPU target we want to override the system 
headers
   // with ones created by the 'libc' project if present.
+  // FIXME: We need to find a way to make these headers compatible with the
+  // host environment so they can be included from offloading languages. For 
now
+  // these are only active when targeting the GPU with cross-compilation.
   if (!Args.hasArg(options::OPT_nostdinc) &&
   !Args.hasArg(options::OPT_nogpuinc) &&
   !Args.hasArg(options::OPT_nobuiltininc) &&
+  C.getActiveOffloadKinds() == Action::OFK_None &&
   (getToolChain().getTriple().isNVPTX() ||
getToolChain().getTriple().isAMDGCN())) {
 
   // Add include/gpu-none-libc/* to our system include path. This lets us 
use
   // GPU-specific system headers first. 
-  // TODO: We need to find a way to make these headers compatible with the
-  // host environment.
   SmallString<128> P(llvm::sys::path::parent_path(D.InstalledDir));
   llvm::sys::path::append(P, "include");
   llvm::sys::path::append(P, "gpu-none-llvm");


Index: clang/test/Driver/gpu-libc-headers.c
===
--- clang/test/Driver/gpu-libc-headers.c
+++ clang/test/Driver/gpu-libc-headers.c
@@ -1,18 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
-// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
-// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib --sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib --sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" "{{.*}}include{{.*}}gpu-none-llvm"{{.*}}"-isysroot" "./"
-
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
 // RUN: -nogpuinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1181,16 +1181,18 @@
 
   // If we are compiling

[PATCH] D153578: [Clang] Disable `libc` headers for offloading languages

2023-06-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, jdoerfert, tra, yaxunl.
Herald added a subscriber: Anastasia.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1, MaskRay.
Herald added a project: clang.

These headers are currently broken when included from the offloading
languages like OpenMP, OpenCL, CUDA, and HIP. Turn this logic off so we
can compile these languages when the GPU libc is installed. I am
currently trying to remedy this and have made an RFC for it in libc,
see 
https://discourse.llvm.org/t/rfc-implementing-gpu-headers-in-the-llvm-c-library/71523.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153578

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/gpu-libc-headers.c


Index: clang/test/Driver/gpu-libc-headers.c
===
--- clang/test/Driver/gpu-libc-headers.c
+++ clang/test/Driver/gpu-libc-headers.c
@@ -1,18 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
-// RUN: -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
-// RUN: -fopenmp-targets=nvptx64-nvidia-cuda 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib 
--sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib 
--sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include{{.*}}gpu-none-llvm"{{.*}}"-isysroot" "./"
-
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
 // RUN: -nogpuinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1181,16 +1181,18 @@
 
   // If we are compiling for a GPU target we want to override the system 
headers
   // with ones created by the 'libc' project if present.
+  // FIXME: We need to find a way to make these headers compatible with the
+  // host environment so they can be included from offloading languages. For 
now
+  // these are only active when targeting the GPU with cross-compilation.
   if (!Args.hasArg(options::OPT_nostdinc) &&
   !Args.hasArg(options::OPT_nogpuinc) &&
   !Args.hasArg(options::OPT_nobuiltininc) &&
+  C.getActiveOffloadKinds() == Action::OFK_None &&
   (getToolChain().getTriple().isNVPTX() ||
getToolChain().getTriple().isAMDGCN())) {
 
   // Add include/gpu-none-libc/* to our system include path. This lets us 
use
   // GPU-specific system headers first. 
-  // TODO: We need to find a way to make these headers compatible with the
-  // host environment.
   SmallString<128> P(llvm::sys::path::parent_path(D.InstalledDir));
   llvm::sys::path::append(P, "include");
   llvm::sys::path::append(P, "gpu-none-llvm");


Index: clang/test/Driver/gpu-libc-headers.c
===
--- clang/test/Driver/gpu-libc-headers.c
+++ clang/test/Driver/gpu-libc-headers.c
@@ -1,18 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
-// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
-// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
-// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib --sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib --sysroot=./ %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
-// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" "{{.*}}include{{.*}}gpu-none-llvm"{{.*}}"-isysroot" "./"
-
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
 // RUN: -nogpuinc %s 2>&1 | FileCheck %s

[PATCH] D153568: [ClangPackager] Add an option to extract inputs to an archive

2023-06-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tra, yaxunl, jdoerfert, 
tianshilei1992, ronlieb, gregrodgers.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

Currently we simply overwrite the output file if we get muliple matches
in the fatbinary. This patch introduces the `--archive` option which
allows us to combine all of the files into a static archive instead.
This is usefuly for creating a device specific static archive library
from a fatbinary.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153568

Files:
  clang/test/Driver/offload-packager.c
  clang/tools/clang-offload-packager/ClangOffloadPackager.cpp

Index: clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
===
--- clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
+++ clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
@@ -15,6 +15,7 @@
 #include "clang/Basic/Version.h"
 
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/OffloadBinary.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileOutputBuffer.h"
@@ -49,6 +50,14 @@
  cl::value_desc("=,..."),
  cl::cat(ClangOffloadPackagerCategory));
 
+static cl::opt
+CreateArchive("archive",
+  cl::desc("Write extracted files to a static archive"),
+  cl::cat(ClangOffloadPackagerCategory));
+
+/// Path of the current binary.
+static const char *PackagerExecutable;
+
 static void PrintVersion(raw_ostream &OS) {
   OS << clang::getClangToolFullVersion("clang-offload-packager") << '\n';
 }
@@ -69,6 +78,18 @@
   return Args;
 }
 
+static Error writeFile(StringRef Filename, StringRef Data) {
+  Expected> OutputOrErr =
+  FileOutputBuffer::create(Filename, Data.size());
+  if (!OutputOrErr)
+return OutputOrErr.takeError();
+  std::unique_ptr Output = std::move(*OutputOrErr);
+  llvm::copy(Data, Output->getBufferStart());
+  if (Error E = Output->commit())
+return E;
+  return Error::success();
+}
+
 static Error bundleImages() {
   SmallVector BinaryData;
   raw_svector_ostream OS(BinaryData);
@@ -111,13 +132,8 @@
 OS << Buffer->getBuffer();
   }
 
-  Expected> OutputOrErr =
-  FileOutputBuffer::create(OutputFile, BinaryData.size());
-  if (!OutputOrErr)
-return OutputOrErr.takeError();
-  std::unique_ptr Output = std::move(*OutputOrErr);
-  std::copy(BinaryData.begin(), BinaryData.end(), Output->getBufferStart());
-  if (Error E = Output->commit())
+  if (Error E = writeFile(OutputFile,
+  StringRef(BinaryData.begin(), BinaryData.size(
 return E;
   return Error::success();
 }
@@ -145,8 +161,9 @@
 StringSaver Saver(Alloc);
 auto Args = getImageArguments(Image, Saver);
 
-for (uint64_t I = 0, E = Binaries.size(); I != E; ++I) {
-  const auto *Binary = Binaries[I].getBinary();
+SmallVector Extracted;
+for (const OffloadFile &File : Binaries) {
+  const auto *Binary = File.getBinary();
   // We handle the 'file' and 'kind' identifiers differently.
   bool Match = llvm::all_of(Args, [&](auto &Arg) {
 const auto [Key, Value] = Arg;
@@ -156,27 +173,45 @@
   return Binary->getOffloadKind() == getOffloadKind(Value);
 return Binary->getString(Key) == Value;
   });
-  if (!Match)
-continue;
-
-  // If the user did not provide a filename derive one from the input and
-  // image.
-  StringRef Filename =
-  !Args.count("file")
-  ? Saver.save(sys::path::stem(InputFile) + "-" +
-   Binary->getTriple() + "-" + Binary->getArch() + "." +
-   std::to_string(I) + "." +
-   getImageKindName(Binary->getImageKind()))
-  : Args["file"];
-
-  Expected> OutputOrErr =
-  FileOutputBuffer::create(Filename, Binary->getImage().size());
-  if (!OutputOrErr)
-return OutputOrErr.takeError();
-  std::unique_ptr Output = std::move(*OutputOrErr);
-  llvm::copy(Binary->getImage(), Output->getBufferStart());
-  if (Error E = Output->commit())
+  if (Match)
+Extracted.push_back(Binary);
+}
+
+if (Extracted.empty())
+  continue;
+
+if (CreateArchive) {
+  if (!Args.count("file"))
+return createStringError(inconvertibleErrorCode(),
+ "Image must have a 'file' argument.");
+
+  SmallVector Members;
+  for (const OffloadBinary *Binary : Extracted)
+Members.emplace_back(MemoryBufferRef(
+Binary->getImage(),
+Binary->getMemoryBufferRef().getBufferIdentifier()));
+
+  if (Error E = writeArchive(Args["file"], Members, true,
+ Archive::getDefaultKindFo

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-20 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 533002.
jhuber6 added a comment.

Fix logic


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153369/new/

https://reviews.llvm.org/D153369

Files:
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/AST/dump.cpp


Index: clang/test/AST/dump.cpp
===
--- clang/test/AST/dump.cpp
+++ clang/test/AST/dump.cpp
@@ -79,11 +79,32 @@
 }
 #pragma omp end declare target
 
-// CHECK:   `-FunctionDecl {{.+}}  
line:[[@LINE-6]]:5 bar 'int ()'
-// CHECK-NEXT:  |-CompoundStmt {{.+}} 
-// CHECK-NEXT:  | |-DeclStmt {{.+}} 
-// CHECK-NEXT:  | | `-VarDecl {{.+}}  col:7 used f 'int'
-// CHECK-NEXT:  | `-ReturnStmt {{.+}} 
-// CHECK-NEXT:  |   `-ImplicitCastExpr {{.+}}  'int' 
-// CHECK-NEXT:  | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 
'f' 'int'
-// CHECK-NEXT:  `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To 
DT_Any 1
+// CHECK:  |-FunctionDecl {{.+}}  
line:[[@LINE-6]]:5 bar 'int ()'
+// CHECK-NEXT: | |-CompoundStmt {{.+}} 
+// CHECK-NEXT: | | |-DeclStmt {{.+}} 
+// CHECK-NEXT: | | | `-VarDecl {{.+}}  col:7 used f 'int'
+// CHECK-NEXT: | | `-ReturnStmt {{.+}} 
+// CHECK-NEXT: | |   `-ImplicitCastExpr {{.+}}  'int' 
+// CHECK-NEXT: | | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 
'f' 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
+
+int dx;
+
+extern int dx;
+#pragma omp declare target to(dx)
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dx 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 4294967295
+// CHECK:  |-VarDecl {{.+}} prev {{.+}}  
col:12 dx 'int' extern
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 4294967295
+
+int dy;
+
+#pragma omp begin declare target
+extern int dy;
+#pragma omp end declare target
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dy 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
+// CHECK:  `-VarDecl {{.+}} prev {{.+}}  
col:12 dy 'int' extern
+// CHECK-NEXT:   `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -22988,6 +22988,15 @@
   ND->addAttr(A);
   if (ASTMutationListener *ML = Context.getASTMutationListener())
 ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
+
+  // If this was not a canonical definition we need to update it as well.
+  if (auto *CD = dyn_cast(ND->getCanonicalDecl())) {
+if (!CD->hasAttr()) {
+  CD->addAttr(A);
+  if (ASTMutationListener *ML = Context.getASTMutationListener())
+ML->DeclarationMarkedOpenMPDeclareTarget(CD, A);
+}
+  }
   checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc);
 }
 
@@ -23092,6 +23101,15 @@
 D->addAttr(A);
 if (ASTMutationListener *ML = Context.getASTMutationListener())
   ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+
+// If this was not a canonical definition we need to update it as well.
+if (auto *CD = dyn_cast(D->getCanonicalDecl())) {
+  if (!CD->hasAttr()) {
+CD->addAttr(A);
+if (ASTMutationListener *ML = Context.getASTMutationListener())
+  ML->DeclarationMarkedOpenMPDeclareTarget(CD, A);
+  }
+}
   }
   return;
 }


Index: clang/test/AST/dump.cpp
===
--- clang/test/AST/dump.cpp
+++ clang/test/AST/dump.cpp
@@ -79,11 +79,32 @@
 }
 #pragma omp end declare target
 
-// CHECK:   `-FunctionDecl {{.+}}  line:[[@LINE-6]]:5 bar 'int ()'
-// CHECK-NEXT:  |-CompoundStmt {{.+}} 
-// CHECK-NEXT:  | |-DeclStmt {{.+}} 
-// CHECK-NEXT:  | | `-VarDecl {{.+}}  col:7 used f 'int'
-// CHECK-NEXT:  | `-ReturnStmt {{.+}} 
-// CHECK-NEXT:  |   `-ImplicitCastExpr {{.+}}  'int' 
-// CHECK-NEXT:  | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 'f' 'int'
-// CHECK-NEXT:  `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 1
+// CHECK:  |-FunctionDecl {{.+}}  line:[[@LINE-6]]:5 bar 'int ()'
+// CHECK-NEXT: | |-CompoundStmt {{.+}} 
+// CHECK-NEXT: | | |-DeclStmt {{.+}} 
+// CHECK-NEXT: | | | `-VarDecl {{.+}}  col:7 used f 'int'
+// CHECK-NEXT: | | `-ReturnStmt {{.+}} 
+// CHECK-NEXT: | |   `-ImplicitCastExpr {{.+}}  'int' 
+// CHECK-NEXT: | | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 'f' 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 1
+
+int dx;
+
+extern int dx;
+#pragma omp declare target to(dx)
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dx 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 4294967295
+// CHECK:  |-VarDecl {{.+}} prev {{.+}}  col:12 dx 'int' extern
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 4294967295
+
+int dy;
+
+#pragma omp begin declare targ

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-20 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 533000.
jhuber6 added a comment.

Adding AST mutation listener to the other modified declaration to signal that 
it was changed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153369/new/

https://reviews.llvm.org/D153369

Files:
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/AST/dump.cpp


Index: clang/test/AST/dump.cpp
===
--- clang/test/AST/dump.cpp
+++ clang/test/AST/dump.cpp
@@ -79,11 +79,32 @@
 }
 #pragma omp end declare target
 
-// CHECK:   `-FunctionDecl {{.+}}  
line:[[@LINE-6]]:5 bar 'int ()'
-// CHECK-NEXT:  |-CompoundStmt {{.+}} 
-// CHECK-NEXT:  | |-DeclStmt {{.+}} 
-// CHECK-NEXT:  | | `-VarDecl {{.+}}  col:7 used f 'int'
-// CHECK-NEXT:  | `-ReturnStmt {{.+}} 
-// CHECK-NEXT:  |   `-ImplicitCastExpr {{.+}}  'int' 
-// CHECK-NEXT:  | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 
'f' 'int'
-// CHECK-NEXT:  `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To 
DT_Any 1
+// CHECK:  |-FunctionDecl {{.+}}  
line:[[@LINE-6]]:5 bar 'int ()'
+// CHECK-NEXT: | |-CompoundStmt {{.+}} 
+// CHECK-NEXT: | | |-DeclStmt {{.+}} 
+// CHECK-NEXT: | | | `-VarDecl {{.+}}  col:7 used f 'int'
+// CHECK-NEXT: | | `-ReturnStmt {{.+}} 
+// CHECK-NEXT: | |   `-ImplicitCastExpr {{.+}}  'int' 
+// CHECK-NEXT: | | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 
'f' 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
+
+int dx;
+
+extern int dx;
+#pragma omp declare target to(dx)
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dx 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 4294967295
+// CHECK:  |-VarDecl {{.+}} prev {{.+}}  
col:12 dx 'int' extern
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 4294967295
+
+int dy;
+
+#pragma omp begin declare target
+extern int dy;
+#pragma omp end declare target
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dy 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
+// CHECK:  `-VarDecl {{.+}} prev {{.+}}  
col:12 dy 'int' extern
+// CHECK-NEXT:   `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -22986,6 +22986,9 @@
   Context, MT, DTCI.DT, IndirectE, IsIndirect, Level,
   SourceRange(Loc, Loc));
   ND->addAttr(A);
+  if (auto *CD = dyn_cast(ND->getCanonicalDecl()))
+if (!CD->hasAttr())
+  CD->addAttr(A);
   if (ASTMutationListener *ML = Context.getASTMutationListener())
 ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
   checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc);
@@ -23090,8 +23093,15 @@
 DTCI.DT, IndirectE, IsIndirect, Level,
 SourceRange(DTCI.Loc, DTCI.Loc));
 D->addAttr(A);
-if (ASTMutationListener *ML = Context.getASTMutationListener())
+if (auto *CD = dyn_cast(D->getCanonicalDecl()))
+  if (!CD->hasAttr())
+CD->addAttr(A);
+if (ASTMutationListener *ML = Context.getASTMutationListener()) {
   ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+  if (auto *CD = dyn_cast(D->getCanonicalDecl()))
+if (!CD->hasAttr())
+  ML->DeclarationMarkedOpenMPDeclareTarget(CD, A);
+}
   }
   return;
 }


Index: clang/test/AST/dump.cpp
===
--- clang/test/AST/dump.cpp
+++ clang/test/AST/dump.cpp
@@ -79,11 +79,32 @@
 }
 #pragma omp end declare target
 
-// CHECK:   `-FunctionDecl {{.+}}  line:[[@LINE-6]]:5 bar 'int ()'
-// CHECK-NEXT:  |-CompoundStmt {{.+}} 
-// CHECK-NEXT:  | |-DeclStmt {{.+}} 
-// CHECK-NEXT:  | | `-VarDecl {{.+}}  col:7 used f 'int'
-// CHECK-NEXT:  | `-ReturnStmt {{.+}} 
-// CHECK-NEXT:  |   `-ImplicitCastExpr {{.+}}  'int' 
-// CHECK-NEXT:  | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 'f' 'int'
-// CHECK-NEXT:  `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 1
+// CHECK:  |-FunctionDecl {{.+}}  line:[[@LINE-6]]:5 bar 'int ()'
+// CHECK-NEXT: | |-CompoundStmt {{.+}} 
+// CHECK-NEXT: | | |-DeclStmt {{.+}} 
+// CHECK-NEXT: | | | `-VarDecl {{.+}}  col:7 used f 'int'
+// CHECK-NEXT: | | `-ReturnStmt {{.+}} 
+// CHECK-NEXT: | |   `-ImplicitCastExpr {{.+}}  'int' 
+// CHECK-NEXT: | | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 'f' 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 1
+
+int dx;
+
+extern int dx;
+#pragma omp declare target to(dx)
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dx 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 4294967295
+// CHECK:  |-VarDecl {{.+}} prev {{.+}}  col:12 dx 'int' extern
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 4294967295
+
+int d

[PATCH] D153369: [OpenMP] Always apply target declarations to canonical definitions

2023-06-20 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield, tra, 
ABataev, carlo.bertolli.
Herald added subscribers: sunshaoce, guansong.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

This patch changes the handling of OpenMP to add the device attributes
to the canonical definitions when we encounter a non-canonical
definition. Previously, the following code would not work because it
would find the non-canonical definition first which would then not be
used anywhere else.

  int x;
  extern int x;

This patch now adds the attribute to both of them. This allows us to
perform the following operation if, for example, there were an
implementation of `stderr` on the device.

  #include 
  
  // List of libc symbols supported on the device.
  extern FILE *stderr;

Unfortunately I cannot think of an equivalent solution to HIP / CUDA
device declarations as those are done with simple attributes. Attributes
themselves cannot be used to affect a definition once its canonical
definition has already been seen. Some help on that front would be
appreciated.

Fixes https://github.com/llvm/llvm-project/issues/63355


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153369

Files:
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/AST/dump.cpp


Index: clang/test/AST/dump.cpp
===
--- clang/test/AST/dump.cpp
+++ clang/test/AST/dump.cpp
@@ -79,11 +79,32 @@
 }
 #pragma omp end declare target
 
-// CHECK:   `-FunctionDecl {{.+}}  
line:[[@LINE-6]]:5 bar 'int ()'
-// CHECK-NEXT:  |-CompoundStmt {{.+}} 
-// CHECK-NEXT:  | |-DeclStmt {{.+}} 
-// CHECK-NEXT:  | | `-VarDecl {{.+}}  col:7 used f 'int'
-// CHECK-NEXT:  | `-ReturnStmt {{.+}} 
-// CHECK-NEXT:  |   `-ImplicitCastExpr {{.+}}  'int' 
-// CHECK-NEXT:  | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 
'f' 'int'
-// CHECK-NEXT:  `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To 
DT_Any 1
+// CHECK:  |-FunctionDecl {{.+}}  
line:[[@LINE-6]]:5 bar 'int ()'
+// CHECK-NEXT: | |-CompoundStmt {{.+}} 
+// CHECK-NEXT: | | |-DeclStmt {{.+}} 
+// CHECK-NEXT: | | | `-VarDecl {{.+}}  col:7 used f 'int'
+// CHECK-NEXT: | | `-ReturnStmt {{.+}} 
+// CHECK-NEXT: | |   `-ImplicitCastExpr {{.+}}  'int' 
+// CHECK-NEXT: | | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 
'f' 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
+
+int dx;
+
+extern int dx;
+#pragma omp declare target to(dx)
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dx 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 4294967295
+// CHECK:  |-VarDecl {{.+}} prev {{.+}}  
col:12 dx 'int' extern
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 4294967295
+
+int dy;
+
+#pragma omp begin declare target
+extern int dy;
+#pragma omp end declare target
+
+// CHECK:  |-VarDecl {{.+}}  col:5 dy 'int'
+// CHECK-NEXT: | `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
+// CHECK:  `-VarDecl {{.+}} prev {{.+}}  
col:12 dy 'int' extern
+// CHECK-NEXT:   `-OMPDeclareTargetDeclAttr {{.+}}  
Implicit MT_To DT_Any 1
Index: clang/lib/Sema/SemaOpenMP.cpp
===
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -22986,6 +22986,9 @@
   Context, MT, DTCI.DT, IndirectE, IsIndirect, Level,
   SourceRange(Loc, Loc));
   ND->addAttr(A);
+  if (auto *CD = dyn_cast(ND->getCanonicalDecl()))
+if (!CD->hasAttr())
+  CD->addAttr(A);
   if (ASTMutationListener *ML = Context.getASTMutationListener())
 ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
   checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc);
@@ -23090,6 +23093,9 @@
 DTCI.DT, IndirectE, IsIndirect, Level,
 SourceRange(DTCI.Loc, DTCI.Loc));
 D->addAttr(A);
+if (auto *CD = dyn_cast(D->getCanonicalDecl()))
+  if (!CD->hasAttr())
+CD->addAttr(A);
 if (ASTMutationListener *ML = Context.getASTMutationListener())
   ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
   }


Index: clang/test/AST/dump.cpp
===
--- clang/test/AST/dump.cpp
+++ clang/test/AST/dump.cpp
@@ -79,11 +79,32 @@
 }
 #pragma omp end declare target
 
-// CHECK:   `-FunctionDecl {{.+}}  line:[[@LINE-6]]:5 bar 'int ()'
-// CHECK-NEXT:  |-CompoundStmt {{.+}} 
-// CHECK-NEXT:  | |-DeclStmt {{.+}} 
-// CHECK-NEXT:  | | `-VarDecl {{.+}}  col:7 used f 'int'
-// CHECK-NEXT:  | `-ReturnStmt {{.+}} 
-// CHECK-NEXT:  |   `-ImplicitCastExpr {{.+}}  'int' 
-// CHECK-NEXT:  | `-DeclRefExpr {{.+}}  'int' lvalue Var {{.+}} 'f' 'int'
-// CHECK-NEXT:  `-OMPDeclareTargetDeclAttr {{.+}}  Implicit MT_To DT_Any 1
+// CHECK:  |-FunctionDecl {{.+}}  line:[[@LINE-6]]:

[PATCH] D152442: [LinkerWrapper] Support linking vendor bitcode late

2023-06-20 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rGefacdfc235e3: [LinkerWrapper] Support linking vendor bitcode 
late (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152442/new/

https://reviews.llvm.org/D152442

Files:
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td


Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -25,9 +25,16 @@
 def bitcode_library_EQ : Joined<["--"], "bitcode-library=">,
   Flags<[WrapperOnlyOption]>, MetaVarName<"--=">,
   HelpText<"Extra bitcode library to link">;
+def builtin_bitcode_EQ : Joined<["--"], "builtin-bitcode=">,
+  Flags<[WrapperOnlyOption]>, MetaVarName<"=">,
+  HelpText<"Perform a special internalizing link on the bitcode file. "
+   "This is necessary for some vendor libraries to be linked 
correctly">;
 def device_linker_args_EQ : Joined<["--"], "device-linker=">,
   Flags<[WrapperOnlyOption]>, MetaVarName<" or =">,
   HelpText<"Arguments to pass to the device linker invocation">;
+def clang_backend : Flag<["--"], "clang-backend">,
+  Flags<[WrapperOnlyOption]>,
+  HelpText<"Run the backend using clang rather than the LTO backend">;
 def dry_run : Flag<["--"], "dry-run">,
   Flags<[WrapperOnlyOption]>,
   HelpText<"Print program arguments without running">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -427,6 +427,17 @@
   for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
 CmdArgs.push_back(Args.MakeArgString("-Wl," + Arg));
 
+  for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) {
+if (llvm::Triple(Arg.split('=').first) == Triple)
+  CmdArgs.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang",
+  Args.MakeArgString(Arg.split('=').second)});
+  }
+
+  // The OpenMPOpt pass can introduce new calls and is expensive, we do not 
want
+  // this when running CodeGen through clang.
+  if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ))
+CmdArgs.append({"-mllvm", "-openmp-opt-disable"});
+
   if (Error Err = executeCommands(*ClangPath, CmdArgs))
 return std::move(Err);
 
@@ -629,7 +640,7 @@
   llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
 
   // LTO Module hook to output bitcode without running the backend.
-  SmallVector BitcodeOutput;
+  SmallVector BitcodeOutput;
   auto OutputBitcode = [&](size_t, const Module &M) {
 auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
   "-jit-" + Triple.getTriple(),
@@ -648,7 +659,9 @@
 
   // We assume visibility of the whole program if every input file was bitcode.
   auto Features = getTargetFeatures(BitcodeInputFiles);
-  auto LTOBackend = Args.hasArg(OPT_embed_bitcode)
+  auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
+Args.hasArg(OPT_builtin_bitcode_EQ) ||
+Args.hasArg(OPT_clang_backend)
 ? createLTO(Args, Features, OutputBitcode)
 : createLTO(Args, Features);
 
@@ -757,8 +770,12 @@
 return Error::success();
   }
 
-  // Append the new inputs to the device linker input.
-  for (StringRef File : Files)
+  // Append the new inputs to the device linker input. If the user requested an
+  // internalizing link we need to pass the bitcode to clang.
+  for (StringRef File :
+   Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)
+   ? BitcodeOutput
+   : Files)
 OutputFiles.push_back(File);
 
   return Error::success();
Index: clang/test/Driver/linker-wrapper.c
===
--- clang/test/Driver/linker-wrapper.c
+++ clang/test/Driver/linker-wrapper.c
@@ -130,3 +130,12 @@
 // RUN:   -o a.out 2>&1 | FileCheck %s --check-prefix=MISSING-LIBRARY
 
 // MISSING-LIBRARY: error: unable to find library -ldummy
+
+// RUN: clang-offload-packager -o %t.out \
+// RUN:   
--image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
+// RUN:   
--image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run 
--clang-backend \
+// RUN:   --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=CLANG-BACKEND

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-20 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG8784b6a8540f: [Clang] Allow bitcode linking when the input 
is LLVM-IR (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c
  clang/test/CodeGen/link-builtin-bitcode.c

Index: clang/test/CodeGen/link-builtin-bitcode.c
===
--- /dev/null
+++ clang/test/CodeGen/link-builtin-bitcode.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN:   -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
+
+#ifdef BITCODE
+int foo(void) { return 42; }
+int x = 12;
+#endif
+
+extern int foo(void);
+extern int x;
+
+int bar() { return foo() + x; }
+//.
+// CHECK: @x = internal addrspace(1) global i32 12, align 4
+//.
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define dso_local i32 @bar
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @foo()
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
+// CHECK-NEXT:[[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
+// CHECK-NEXT:ret i32 [[ADD]]
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define internal i32 @foo
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:ret i32 42
+//
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+//.
Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
 //===--===//
 
 #include "clang/CodeGen/CodeGenAction.h"
+#include "CGCall.h"
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
@@ -262,7 +263,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -271,8 +272,8 @@
 // in LLVM IR.
 if (F.isIntrinsic())
   continue;
-Gen->CGM().mergeDefaultFunctionDefin

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-19 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 532748.
jhuber6 added a comment.

I'm not sure why this keeps failing on Windows and have no clue how to tell 
what's going wrong. The builder simply says

  c:\ws\w4\llvm-project\premerge-checks\build\bin\clang-linker-wrapper.exe: 
error: invalid argument

But I'm unsure what could be causing that since it works on Linux and we have 
plenty of other tests that Windows passes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered b

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-19 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a subscriber: ronlieb.
jhuber6 added a comment.

ping
@ronlieb


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153123: Fix diag for read-only target features

2023-06-16 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/test/OpenMP/driver-openmp-amdgpu.c:8
+// RUN:   --offload-device-only -o - 2>&1 | FileCheck --check-prefix=CHECK %s
+// CHECK-NOT: warning: feature flag {{.*}} is ignored since the feature is 
read only

jdoerfert wrote:
> yaxunl wrote:
> > arsenm wrote:
> > > Can you use -verify and expected-no-diagnostics? CHECK-NOT is really 
> > > fragile
> > -verify only works with clang -cc1, whereas we want to make sure there is 
> > no warning for the whole compilation invoked by clang driver
> let's then check-not for any warning?
It's probably reasonable that this shouldn't return any random errors so I'm 
fine with false positives.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153123/new/

https://reviews.llvm.org/D153123

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151349: [HIP] emit macro `__HIP_NO_IMAGE_SUPPORT`

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

I've started seeing these errors when compiling for OpenMP targeting AMDGPU:

  $ clang input.c -fopenmp --offload-arch=gfx1030
  warning: feature flag '+image-insts' is ignored since the feature is read 
only [-Winvalid-command-line-argument]

Any suggestions for what might cause this?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151349/new/

https://reviews.llvm.org/D151349

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 531791.
jhuber6 added a comment.

Updating


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected>
-linkAndWrapDeviceFiles(SmallVectorImpl &LinkerInputFiles,
+linkAndWrapDeviceFiles(SmallVector> &LinkerInputFiles,
const InputArgList &Args, char **Argv, int Argc) {
   llvm::TimeTraceScope TimeScope("Handle all device input");
 
-  DenseMap> InputMap;
-  for (auto &File : LinkerInputFi

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/test/Driver/linker-wrapper.c:48
+// AMDGPU-LINK-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
+
 // RUN: clang-offload-packager -o %t.out \

yaxunl wrote:
> can we put some variables in the input bitcode so that we can check the 
> linked bitcode?
> 
> I would expect there will be only one linked bitcode for gfx90a:xnack+ and it 
> contains both variables.
> 
> I don't think it is a good idea to let the final object embed bitcode for 
> both gfx90a:xnack+ and gfx90a since that will result in an invalid container. 
> Therefore I think we should only do linking with target ID's from the first 
> container.
I can make a static library that's `gfx90a`, that covers the main case where we 
still link in the OpenMP runtime library that's compiled with `90a` if the user 
uses `90a:xnack+`. I'd need to place a random external variable to force it to 
extract however.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 531756.
jhuber6 added a comment.

Hopefully fixing test on Windows. I think it's fine to let the packager bundle
mutliple of these now since it's caught in `clang`. So if the user really wants
to force it we should allow them to since the bundler format is just a simple
fatbinary.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected>
-linkAndWrapDeviceFiles(SmallVectorImpl &LinkerInputFiles,
+linkAndWrapD

[PATCH] D152965: [OpenMP] Correctly diagnose conflicting target identifierers for AMDGPU

2023-06-15 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rGe96bec9cd8e1: [OpenMP] Correctly diagnose conflicting target 
identifierers for AMDGPU (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152965/new/

https://reviews.llvm.org/D152965

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -66,3 +66,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: 
clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
+
+// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a,gfx90a:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
+// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' 
and 'gfx90a:xnack+'
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4313,8 +4313,8 @@
 /// incompatible pair if a conflict occurs.
 static std::optional>
 getConflictOffloadArchCombination(const llvm::DenseSet &Archs,
-  Action::OffloadKind Kind) {
-  if (Kind != Action::OFK_HIP)
+  llvm::Triple Triple) {
+  if (!Triple.isAMDGPU())
 return std::nullopt;
 
   std::set ArchSet;
@@ -4399,7 +4399,8 @@
 }
   }
 
-  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+  if (auto ConflictingArchs =
+  getConflictOffloadArchCombination(Archs, TC->getTriple())) {
 C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
 << ConflictingArchs->first << ConflictingArchs->second;
 C.setContainsError();


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -66,3 +66,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
+
+// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
+// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+'
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4313,8 +4313,8 @@
 /// incompatible pair if a conflict occurs.
 static std::optional>
 getConflictOffloadArchCombination(const llvm::DenseSet &Archs,
-  Action::OffloadKind Kind) {
-  if (Kind != Action::OFK_HIP)
+  llvm::Triple Triple) {
+  if (!Triple.isAMDGPU())
 return std::nullopt;
 
   std::set ArchSet;
@@ -4399,7 +4399,8 @@
 }
   }
 
-  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+  if (auto ConflictingArchs =
+  getConflictOffloadArchCombination(Archs, TC->getTriple())) {
 C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
 << ConflictingArchs->first << ConflictingArchs->second;
 C.setContainsError();
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152965: [OpenMP] Correctly diagnose conflicting target identifierers for AMDGPU

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 531531.
jhuber6 added a comment.

Clang format


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152965/new/

https://reviews.llvm.org/D152965

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -66,3 +66,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: 
clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
+
+// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a,gfx90a:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
+// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' 
and 'gfx90a:xnack+'
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4313,8 +4313,8 @@
 /// incompatible pair if a conflict occurs.
 static std::optional>
 getConflictOffloadArchCombination(const llvm::DenseSet &Archs,
-  Action::OffloadKind Kind) {
-  if (Kind != Action::OFK_HIP)
+  llvm::Triple Triple) {
+  if (!Triple.isAMDGPU())
 return std::nullopt;
 
   std::set ArchSet;
@@ -4399,7 +4399,8 @@
 }
   }
 
-  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+  if (auto ConflictingArchs =
+  getConflictOffloadArchCombination(Archs, TC->getTriple())) {
 C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
 << ConflictingArchs->first << ConflictingArchs->second;
 C.setContainsError();


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -66,3 +66,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
+
+// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
+// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+'
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4313,8 +4313,8 @@
 /// incompatible pair if a conflict occurs.
 static std::optional>
 getConflictOffloadArchCombination(const llvm::DenseSet &Archs,
-  Action::OffloadKind Kind) {
-  if (Kind != Action::OFK_HIP)
+  llvm::Triple Triple) {
+  if (!Triple.isAMDGPU())
 return std::nullopt;
 
   std::set ArchSet;
@@ -4399,7 +4399,8 @@
 }
   }
 
-  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+  if (auto ConflictingArchs =
+  getConflictOffloadArchCombination(Archs, TC->getTriple())) {
 C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
 << ConflictingArchs->first << ConflictingArchs->second;
 C.setContainsError();
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152965: [OpenMP] Correctly diagnose conflicting target identifierers for AMDGPU

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, yaxunl, jdoerfert.
Herald added subscribers: sunshaoce, kerbowa, guansong, tpr, dstuttard, 
jvesely, kzhuravl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1, MaskRay, wdng.
Herald added a project: clang.

There are static checks on the target identifiers allowed in a single
TU. Previously theses checks were only applied to HIP even though they
should be the same for OpenMP targeting AMDGPU. Simply enable these
checks for OpenMP.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152965

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -66,3 +66,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: 
clang-offload-packager{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp,feature=-sramecc,feature=+xnack
+
+// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a,gfx90a:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
+// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' 
and 'gfx90a:xnack+'
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4313,8 +4313,8 @@
 /// incompatible pair if a conflict occurs.
 static std::optional>
 getConflictOffloadArchCombination(const llvm::DenseSet &Archs,
-  Action::OffloadKind Kind) {
-  if (Kind != Action::OFK_HIP)
+  llvm::Triple Triple) {
+  if (!Triple.isAMDGPU())
 return std::nullopt;
 
   std::set ArchSet;
@@ -4399,7 +4399,8 @@
 }
   }
 
-  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs,
+TC->getTriple())) {
 C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
 << ConflictingArchs->first << ConflictingArchs->second;
 C.setContainsError();


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -66,3 +66,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp,feature=-sramecc,feature=+xnack
+
+// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
+// CHECK-TARGET-ID-ERROR: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+'
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4313,8 +4313,8 @@
 /// incompatible pair if a conflict occurs.
 static std::optional>
 getConflictOffloadArchCombination(const llvm::DenseSet &Archs,
-  Action::OffloadKind Kind) {
-  if (Kind != Action::OFK_HIP)
+  llvm::Triple Triple) {
+  if (!Triple.isAMDGPU())
 return std::nullopt;
 
   std::set ArchSet;
@@ -4399,7 +4399,8 @@
 }
   }
 
-  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+  if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs,
+TC->getTriple())) {
 C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
 << ConflictingArchs->first << ConflictingArchs->second;
 C.setContainsError();
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D152882#4422797 , @yaxunl wrote:

> In D152882#4422788 , @jhuber6 wrote:
>
>> In D152882#4421138 , @yaxunl wrote:
>>
>>> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode 
>>> of target ID gfx90a as long as they are from different containers. So there 
>>> are two rules about target ID: 1. compatibility rules for objects/bitcode 
>>> in the same container 2. compatibility rules for linking bitcode of 
>>> different target ID's.
>>>
>>> we need tests for both rules.
>>
>> So I'm wondering why I'm allowed to do 
>> `--offload-arch=gfx90a,gfx90a:xnack+`. Shouldn't that be caught by 
>> `getConflictTargetIDCombination`? That seems like the proper place to 
>> diagnose this.
>
> clang --offload-arch=gfx90a,gfx90a:xnack+ -c a.hip
> clang: error: invalid offload arch combinations: 'gfx90a' and 'gfx90a:xnack+' 
> (for a specific processor, a feature should either exist in all offload 
> archs, or not exist in any offload archs)
>
> At least it is caught for HIP. OpenMP may not check that.



  if (Kind != Action::OFK_HIP)
return std::nullopt;

Yes, this would be the culprit. Guessing we shouldn't do that?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D152882#4421138 , @yaxunl wrote:

> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode of 
> target ID gfx90a as long as they are from different containers. So there are 
> two rules about target ID: 1. compatibility rules for objects/bitcode in the 
> same container 2. compatibility rules for linking bitcode of different target 
> ID's.
>
> we need tests for both rules.

So I'm wondering why I'm allowed to do `--offload-arch=gfx90a,gfx90a:xnack+`. 
Shouldn't that be caught by `getConflictTargetIDCombination`? That seems like 
the proper place to diagnose this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 531520.
jhuber6 added a comment.
Herald added subscribers: kerbowa, jvesely.

Adjusting test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this compilation. The input
+  // may be an AMDGPU target-id so we split off anything before the colon.
   const OptTable &Tbl = getOptTable();
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
+   Args.MakeArgString(
+   Input.front().getBinary()->getArch().split(':').first));
+  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_full_arch_EQ),
Args.MakeArgString(Input.front().getBinary()->getArch()));
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
@@ -1001,23 +1005,13 @@
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected>
-linkAndWrapDeviceFiles(SmallVectorImpl &LinkerInputFiles,
+linkAndWrapDeviceFiles(SmallVector> &LinkerInputFiles,
const InputArgList &Args, char **Argv, int Argc) {
   llvm::TimeTraceScope TimeScope("Handle all device input");
 
-  Den

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-14 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D152882#4421138 , @yaxunl wrote:

> The design of target ID put constraints on target ID's that can be embedded 
> into one executable 
> https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id . For 
> example, gfx90a and gfx90a:xnack+ cannot be embedded into one executable 
> since this will cause difficulty for runtime to choose device binary to run, 
> especially when there are multiple target ID features. clang does not allow 
> --offload-arch=gfx90a and --offload-arch=gfx90a:xnack+ to be used together to 
> compile HIP programs. It would be preferred for offloack-packager to enforce 
> this constraint too.
>
> However, bitcode of  target ID gfx90a:xnack+ is allowed to link in bitcode of 
> target ID gfx90a as long as they are from different containers. So there are 
> two rules about target ID: 1. compatibility rules for objects/bitcode in the 
> same container 2. compatibility rules for linking bitcode of different target 
> ID's.
>
> we need tests for both rules.

Should that be a follow-up patch? Or one included here. It definitely 
influences the test so I can change that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152882/new/

https://reviews.llvm.org/D152882

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152882: [LinkerWrapper] Support device binaries in multiple link jobs

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tra, yaxunl.
Herald added a subscriber: tpr.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, jplehr, sstefan1, MaskRay.
Herald added a reviewer: jdoerfert.
Herald added projects: clang, LLVM.

Currently the linker wrapper strictly assigns a single input binary to a
single link job based off of its input architecture. This is not
sufficient to implement the AMDGPU target ID correctly as this could
have many compatible architectures participating in multiple links.

This patch introduces the ability to have a single binary input be
linked multiple times. For example, given the following, we will now
link in the static library where previously we would not.

  clang foo.c -fopenmp --offload-arch=gfx90a
  llvm-ar rcs libfoo.a foo.o
  clang foo.c -fopenmp --offload-arch=gfx90a:xnack+ libfoo.a

This also means that given the following we will link the basic input
twice, but that's on the user for providing two versions.

  clang foo.c -fopenmp --offload-arch=gfx90a,gfx90a:xnack+

This should allow us to also support a "generic" target in the future
for IR without a specific architecture.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152882

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
  llvm/include/llvm/Object/OffloadBinary.h

Index: llvm/include/llvm/Object/OffloadBinary.h
===
--- llvm/include/llvm/Object/OffloadBinary.h
+++ llvm/include/llvm/Object/OffloadBinary.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_OBJECT_OFFLOADBINARY_H
 #define LLVM_OBJECT_OFFLOADBINARY_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
@@ -155,12 +156,22 @@
 /// owns its memory.
 class OffloadFile : public OwningBinary {
 public:
+  /// An ordered pair of the target triple and the architecture.
   using TargetID = std::pair;
 
   OffloadFile(std::unique_ptr Binary,
   std::unique_ptr Buffer)
   : OwningBinary(std::move(Binary), std::move(Buffer)) {}
 
+  Expected copy() const {
+std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(
+getBinary()->getMemoryBufferRef().getBuffer());
+auto NewBinaryOrErr = OffloadBinary::create(*Buffer);
+if (!NewBinaryOrErr)
+  return NewBinaryOrErr.takeError();
+return OffloadFile(std::move(*NewBinaryOrErr), std::move(Buffer));
+  }
+
   /// We use the Triple and Architecture pair to group linker inputs together.
   /// This conversion function lets us use these inputs in a hash-map.
   operator TargetID() const {
@@ -168,6 +179,28 @@
   }
 };
 
+/// Queries if the target \p LHS is compatible with \p RHS for linking purposes.
+inline bool areTargetsCompatible(const OffloadFile::TargetID LHS,
+ const OffloadFile::TargetID RHS) {
+  if (LHS == RHS)
+return true;
+
+  // If the target is AMD we check the target IDs for compatibility. A target id
+  // is a string conforming to the folowing BNF syntax:
+  //
+  //  target-id ::= ' ( :  ( '+' | '-' ) )*'
+  //
+  // This is used to link mutually compatible architectures together.
+  llvm::Triple T(LHS.first);
+  if (!T.isAMDGPU())
+return false;
+
+  // The targets are compatible if the architecture is a subset of the other.
+  if (RHS.second.contains(LHS.second))
+return true;
+  return false;
+}
+
 /// Extracts embedded device offloading code from a memory \p Buffer to a list
 /// of \p Binaries.
 Error extractOffloadBinaries(MemoryBufferRef Buffer,
Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -67,6 +67,9 @@
 def arch_EQ : Joined<["--"], "arch=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device subarchitecture">;
+def full_arch_EQ : Joined<["--"], "full-arch=">,
+  Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
+  HelpText<"The fully qualifier device subarchitecture for AMD's target ID">;
 def triple_EQ : Joined<["--"], "triple=">,
   Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">,
   HelpText<"The device target triple">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -969,9 +969,13 @@
   for (Arg *A : Args)
 DAL.append(A);
 
-  // Set the subarchitecture and target triple for this compilation.
+  // Set the subarchitecture and target triple for this comp

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2001
 
-void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
- bool HasOptnone,
- bool AttrOnCallSite,
- llvm::AttrBuilder &FuncAttrs) 
{
-  getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite,
-  FuncAttrs);
-  if (!AttrOnCallSite) {
-// If we're just getting the default, get the default values for mergeable
-// attributes.
-addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs);
-  }
-}
+/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as
+/// though we had emitted it ourselves. We remove any attributes on F that

JonChesterfield wrote:
> I'm used to this sort of copy-some-args-and-not-others showing up in bug 
> reports. Could this patch be re-ordered to make it apparent what functional 
> changes are happening relative to the current head?
It's the same that will get added in in the `GetCPUAndFeaturesAttributes` 
function if you pass in a null declaration.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139730: [OpenMP][DeviceRTL][AMDGPU] Support code object version 5

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D139730#4418630 , @arsenm wrote:

> In D139730#3991628 , 
> @JonChesterfield wrote:
>
>> We currently require that library for libm, which I'm also not thrilled 
>> about, but at least you can currently build and run openmp programs (that 
>> don't use libm, like much of our tests) without it.
>
> The ABI isn't defined in terms of what device-libs does. It's fixed offsets 
> off of pointers accessible through amdgcn intrinsics. You can also just 
> directly emit the same IR, these functions aren't complicated

This is the suggestion I've talked with @saiislam about. I think we should just 
copy the magic intrinsics that are being queried here. I'm assuming we don't 
need to bother with supporting both v4 and v5 so we can just make the switch 
all at once.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139730/new/

https://reviews.llvm.org/D139730

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 530946.
jhuber6 added a comment.

Format


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c
  clang/test/CodeGen/link-builtin-bitcode.c

Index: clang/test/CodeGen/link-builtin-bitcode.c
===
--- /dev/null
+++ clang/test/CodeGen/link-builtin-bitcode.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN:   -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
+
+#ifdef BITCODE
+int foo(void) { return 42; }
+int x = 12;
+#endif
+
+extern int foo(void);
+extern int x;
+
+int bar() { return foo() + x; }
+//.
+// CHECK: @x = internal addrspace(1) global i32 12, align 4
+//.
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define dso_local i32 @bar
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @foo()
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
+// CHECK-NEXT:[[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
+// CHECK-NEXT:ret i32 [[ADD]]
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define internal i32 @foo
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:ret i32 42
+//
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+//.
Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
 //===--===//
 
 #include "clang/CodeGen/CodeGenAction.h"
+#include "CGCall.h"
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
@@ -262,7 +263,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -271,8 +272,8 @@
 // in LLVM IR.
 if (F.isIntrinsic())
   continue;
-Gen->CGM().mergeDefaultFunctionDefinitionAttributes(F,
-LM.Internalize);
+

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 530944.
jhuber6 added a comment.

Removing


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c
  clang/test/CodeGen/link-builtin-bitcode.c

Index: clang/test/CodeGen/link-builtin-bitcode.c
===
--- /dev/null
+++ clang/test/CodeGen/link-builtin-bitcode.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN:   -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
+
+#ifdef BITCODE
+int foo(void) { return 42; }
+int x = 12;
+#endif
+
+extern int foo(void);
+extern int x;
+
+int bar() { return foo() + x; }
+//.
+// CHECK: @x = internal addrspace(1) global i32 12, align 4
+//.
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define dso_local i32 @bar
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @foo()
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
+// CHECK-NEXT:[[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
+// CHECK-NEXT:ret i32 [[ADD]]
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define internal i32 @foo
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:ret i32 42
+//
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+//.
Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
 //===--===//
 
 #include "clang/CodeGen/CodeGenAction.h"
+#include "CGCall.h"
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
@@ -262,7 +263,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -271,8 +272,8 @@
 // in LLVM IR.
 if (F.isIntrinsic())
   continue;
-Gen->CGM().mergeDefaultFunctionDefinitionAttributes(F,
-LM.Internalize);
+

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 marked an inline comment as not done.
jhuber6 added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2105-2106
+  llvm::AttrBuilder FuncAttrs(F.getContext());
+  GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs,
+  /*AddTargetFeatures=*/false);
+

yaxunl wrote:
> should this be removed? seems redundant
I wasn't entirely sure, there's a lot of work done in that function but pretty 
much all of it goes away since we just use `GlobalDecl()`. So for the version 
that doesn't have access to CGM I copies out the only two relevant lines and 
put them in line 2063. We could probably make this use the same code but I 
wasn't sure if there was something else we might need it for.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 marked an inline comment as not done.
jhuber6 added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2052-2106
+void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes(
+llvm::Function &F, const CodeGenOptions CodeGenOpts,
+const LangOptions &LangOpts, const TargetOptions &TargetOpts,
+bool WillInternalize) {
+
+  llvm::AttrBuilder FuncAttrs(F.getContext());
+  if (!TargetOpts.CPU.empty())

yaxunl wrote:
> can we reorder these functions to minimize the diffs? Also some comments 
> about the difference among these functions may help.
I added some comments. These can't be reordered because they're new functions, 
I simply added a layer of indirection to go through a common helper so we can 
call the main bulk of the code without needing the full `CGM`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-13 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 530919.
jhuber6 marked 2 inline comments as done.
jhuber6 added a comment.

Addressing comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c
  clang/test/CodeGen/link-builtin-bitcode.c

Index: clang/test/CodeGen/link-builtin-bitcode.c
===
--- /dev/null
+++ clang/test/CodeGen/link-builtin-bitcode.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN:   -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
+
+#ifdef BITCODE
+int foo(void) { return 42; }
+int x = 12;
+#endif
+
+extern int foo(void);
+extern int x;
+
+int bar() { return foo() + x; }
+//.
+// CHECK: @x = internal addrspace(1) global i32 12, align 4
+//.
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define dso_local i32 @bar
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @foo()
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
+// CHECK-NEXT:[[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
+// CHECK-NEXT:ret i32 [[ADD]]
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define internal i32 @foo
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:ret i32 42
+//
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+//.
Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
 //===--===//
 
 #include "clang/CodeGen/CodeGenAction.h"
+#include "CGCall.h"
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
@@ -262,7 +263,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -271,8 +272,8 @@
 // in LLVM IR.
 if (F.isIntrinsic())
   continue;
-Gen->CGM().mergeDefaultFunctionDefinitionAttributes(F,
-

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-12 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152461: [LTO] Add a config option to make the final output bitcode

2023-06-08 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 529682.
jhuber6 added a comment.

Remove leftover debug


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152461/new/

https://reviews.llvm.org/D152461

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  llvm/include/llvm/LTO/Config.h
  llvm/lib/LTO/LTOBackend.cpp

Index: llvm/lib/LTO/LTOBackend.cpp
===
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -366,6 +366,17 @@
   if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
 return;
 
+  if (Conf.OutputPreCodegenBitcode) {
+Expected> StreamOrErr =
+AddStream(Task, Mod.getModuleIdentifier());
+if (Error Err = StreamOrErr.takeError())
+  report_fatal_error(std::move(Err));
+std::unique_ptr &Stream = *StreamOrErr;
+
+WriteBitcodeToFile(Mod, *Stream->OS);
+return;
+  }
+
   if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
 llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
/*EmbedBitcode*/ true,
Index: llvm/include/llvm/LTO/Config.h
===
--- llvm/include/llvm/LTO/Config.h
+++ llvm/include/llvm/LTO/Config.h
@@ -84,6 +84,10 @@
   /// want to know a priori all possible output files.
   bool AlwaysEmitRegularLTOObj = false;
 
+  /// Write the pre-codegen module to the output without performing codegen.
+  /// Useful for post-processing on the fully linked LTO module.
+  bool OutputPreCodegenBitcode = false;
+
   /// Allows non-imported definitions to get the potentially more constraining
   /// visibility from the prevailing definition. FromPrevailing is the default
   /// because it works for many binary formats. ELF can use the more optimized
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -509,10 +509,8 @@
   return UnifiedFeatures;
 }
 
-template >
-std::unique_ptr createLTO(
-const ArgList &Args, const std::vector &Features,
-ModuleHook Hook = [](size_t, const Module &) { return true; }) {
+std::unique_ptr createLTO(const ArgList &Args,
+const std::vector &Features) {
   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
   lto::Config Conf;
@@ -566,7 +564,9 @@
   return true;
 };
   }
-  Conf.PostOptModuleHook = Hook;
+  Conf.OutputPreCodegenBitcode = Args.hasArg(OPT_embed_bitcode) ||
+ Args.hasArg(OPT_builtin_bitcode_EQ) ||
+ Args.hasArg(OPT_clang_backend);
   Conf.CGFileType =
   (Triple.isNVPTX() || SaveTemps) ? CGFT_AssemblyFile : CGFT_ObjectFile;
 
@@ -639,31 +639,9 @@
   // Remove all the bitcode files that we moved from the original input.
   llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
 
-  // LTO Module hook to output bitcode without running the backend.
-  SmallVector BitcodeOutput;
-  auto OutputBitcode = [&](size_t, const Module &M) {
-auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
-  "-jit-" + Triple.getTriple(),
-  "bc");
-if (!TempFileOrErr)
-  reportError(TempFileOrErr.takeError());
-
-std::error_code EC;
-raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None);
-if (EC)
-  reportError(errorCodeToError(EC));
-WriteBitcodeToFile(M, LinkedBitcode);
-BitcodeOutput.push_back(*TempFileOrErr);
-return false;
-  };
-
   // We assume visibility of the whole program if every input file was bitcode.
   auto Features = getTargetFeatures(BitcodeInputFiles);
-  auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
-Args.hasArg(OPT_builtin_bitcode_EQ) ||
-Args.hasArg(OPT_clang_backend)
-? createLTO(Args, Features, OutputBitcode)
-: createLTO(Args, Features);
+  auto LTOBackend = createLTO(Args, Features);
 
   // We need to resolve the symbols so the LTO backend knows which symbols need
   // to be kept or can be internalized. This is a simplified symbol resolution
@@ -733,12 +711,15 @@
   // Run the LTO job to compile the bitcode.
   size_t MaxTasks = LTOBackend->getMaxTasks();
   SmallVector Files(MaxTasks);
-  auto AddStream =
-  [&](size_t Task,
-  const Twine &ModuleName) -> std::unique_ptr {
+  auto AddStream = [&](size_t Task, const Twine &ModuleName) {
+bool OutputBitcode = Args.hasArg(OPT_embed_bitcode) ||
+ Args.hasArg(OPT_builtin_bitcode_EQ) ||
+ Args.hasArg(OPT_c

[PATCH] D152461: [LTO] Add a config option to make the final output bitcode

2023-06-08 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, MaskRay, fhahn, tra, yaxunl, 
JonChesterfield, tianshilei1992.
Herald added subscribers: ormris, StephenFan, steven_wu, hiraditya, inglorion.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

The LTO pass currently only offers assembly or object file outputs. We
can obtain the final LLVM output using the hooks, but is inconvenient
and disrupts the flow of the compilation. This patch proposes adding a
convenience function to perform this if the user requires it. We would
like this for the desire to retain the post-optimized LTO bitcode for
LLVM offloading compileations, such as JIT, builtin linking, or
performing a relocatable link on LLVM-IR.

Depends on D152442 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152461

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  llvm/include/llvm/LTO/Config.h
  llvm/lib/LTO/LTOBackend.cpp

Index: llvm/lib/LTO/LTOBackend.cpp
===
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -366,6 +366,17 @@
   if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
 return;
 
+  if (Conf.OutputPreCodegenBitcode) {
+Expected> StreamOrErr =
+AddStream(Task, Mod.getModuleIdentifier());
+if (Error Err = StreamOrErr.takeError())
+  report_fatal_error(std::move(Err));
+std::unique_ptr &Stream = *StreamOrErr;
+
+WriteBitcodeToFile(Mod, *Stream->OS);
+return;
+  }
+
   if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
 llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
/*EmbedBitcode*/ true,
Index: llvm/include/llvm/LTO/Config.h
===
--- llvm/include/llvm/LTO/Config.h
+++ llvm/include/llvm/LTO/Config.h
@@ -84,6 +84,10 @@
   /// want to know a priori all possible output files.
   bool AlwaysEmitRegularLTOObj = false;
 
+  /// Write the pre-codegen module to the output without performing codegen.
+  /// Useful for post-processing on the fully linked LTO module.
+  bool OutputPreCodegenBitcode = false;
+
   /// Allows non-imported definitions to get the potentially more constraining
   /// visibility from the prevailing definition. FromPrevailing is the default
   /// because it works for many binary formats. ELF can use the more optimized
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -509,10 +509,8 @@
   return UnifiedFeatures;
 }
 
-template >
-std::unique_ptr createLTO(
-const ArgList &Args, const std::vector &Features,
-ModuleHook Hook = [](size_t, const Module &) { return true; }) {
+std::unique_ptr createLTO(const ArgList &Args,
+const std::vector &Features) {
   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
   lto::Config Conf;
@@ -566,7 +564,9 @@
   return true;
 };
   }
-  Conf.PostOptModuleHook = Hook;
+  Conf.OutputPreCodegenBitcode = Args.hasArg(OPT_embed_bitcode) ||
+ Args.hasArg(OPT_builtin_bitcode_EQ) ||
+ Args.hasArg(OPT_clang_backend);
   Conf.CGFileType =
   (Triple.isNVPTX() || SaveTemps) ? CGFT_AssemblyFile : CGFT_ObjectFile;
 
@@ -639,31 +639,9 @@
   // Remove all the bitcode files that we moved from the original input.
   llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
 
-  // LTO Module hook to output bitcode without running the backend.
-  SmallVector BitcodeOutput;
-  auto OutputBitcode = [&](size_t, const Module &M) {
-auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
-  "-jit-" + Triple.getTriple(),
-  "bc");
-if (!TempFileOrErr)
-  reportError(TempFileOrErr.takeError());
-
-std::error_code EC;
-raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None);
-if (EC)
-  reportError(errorCodeToError(EC));
-WriteBitcodeToFile(M, LinkedBitcode);
-BitcodeOutput.push_back(*TempFileOrErr);
-return false;
-  };
-
   // We assume visibility of the whole program if every input file was bitcode.
   auto Features = getTargetFeatures(BitcodeInputFiles);
-  auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
-Args.hasArg(OPT_builtin_bitcode_EQ) ||
-Args.hasArg(OPT_clang_backend)
-? createLTO(Args, Features, OutputBitcode)
-

[PATCH] D152442: [LinkerWrapper] Support linking vendor bitcode late

2023-06-08 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tra, yaxunl, tianshilei1992, 
jdoerfert, gregrodgers, saiislam.
Herald added a subscriber: tpr.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

The GPU vendors currently provide bitcode files for their device
runtime. These files need to be handled specially as they are not built
to be linked in with a standard `llvm-link` call or through LTO linking.
This patch adds an alternative to use the existing clang handling of
these libraries that does the necessary magic to make this work.

We do this by causing the LTO backend to emit bitcode before running the
backend. We then pass this through to clang which uses the existing
support which has been fixed to support this by D152391 
. The backend
will then be run with the merged module.

This patch adds the `--builtin-bitcode==file.bc` to specify a single
file, or just `--clang-backend` to let the toolchain handle its defaults
(currently nothing for NVPTX and the ROCm device libs for AMDGPU). This may have
a performance impact due to running the optimizations again, we could
potentially disable optimizations in LTO and only do the linking if this is an
issue.

This should allow us to resolve issues when relying on the `linker-wrapper` to
do a late linking that may depend on vendor libraries.

Depends on D152391 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152442

Files:
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td


Index: clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
===
--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -25,9 +25,16 @@
 def bitcode_library_EQ : Joined<["--"], "bitcode-library=">,
   Flags<[WrapperOnlyOption]>, MetaVarName<"--=">,
   HelpText<"Extra bitcode library to link">;
+def builtin_bitcode_EQ : Joined<["--"], "builtin-bitcode=">,
+  Flags<[WrapperOnlyOption]>, MetaVarName<"=">,
+  HelpText<"Perform a special internalizing link on the bitcode file. "
+   "This is necessary for some vendor libraries to be linked 
correctly">;
 def device_linker_args_EQ : Joined<["--"], "device-linker=">,
   Flags<[WrapperOnlyOption]>, MetaVarName<" or =">,
   HelpText<"Arguments to pass to the device linker invocation">;
+def clang_backend : Flag<["--"], "clang-backend">,
+  Flags<[WrapperOnlyOption]>,
+  HelpText<"Run the backend using clang rather than the LTO backend">;
 def dry_run : Flag<["--"], "dry-run">,
   Flags<[WrapperOnlyOption]>,
   HelpText<"Print program arguments without running">;
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -427,6 +427,17 @@
   for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
 CmdArgs.push_back(Args.MakeArgString("-Wl," + Arg));
 
+  for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) {
+if (llvm::Triple(Arg.split('=').first) == Triple)
+  CmdArgs.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang",
+  Args.MakeArgString(Arg.split('=').second)});
+  }
+
+  // The OpenMPOpt pass can introduce new calls and is expensive, we do not 
want
+  // this when running CodeGen through clang.
+  if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ))
+CmdArgs.append({"-mllvm", "-openmp-opt-disable"});
+
   if (Error Err = executeCommands(*ClangPath, CmdArgs))
 return std::move(Err);
 
@@ -629,7 +640,7 @@
   llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
 
   // LTO Module hook to output bitcode without running the backend.
-  SmallVector BitcodeOutput;
+  SmallVector BitcodeOutput;
   auto OutputBitcode = [&](size_t, const Module &M) {
 auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
   "-jit-" + Triple.getTriple(),
@@ -648,7 +659,9 @@
 
   // We assume visibility of the whole program if every input file was bitcode.
   auto Features = getTargetFeatures(BitcodeInputFiles);
-  auto LTOBackend = Args.hasArg(OPT_embed_bitcode)
+  auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
+Args.hasArg(OPT_builtin_bitcode_EQ) ||
+Args.hasArg(OPT_clang_backend)
 ? createLTO(Args, Features, OutputBitcode)
 : createLTO(Args, Features);
 
@@ -757,8 +770,12 @@
 return Error::success();
   }
 
-  // Append the new inputs to the

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-08 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 529557.
jhuber6 added a comment.

Add a better test to show that attributes are replaced and default attributes 
are added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c
  clang/test/CodeGen/link-builtin-bitcode.c

Index: clang/test/CodeGen/link-builtin-bitcode.c
===
--- /dev/null
+++ clang/test/CodeGen/link-builtin-bitcode.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN:   -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
+
+#ifdef BITCODE
+int foo(void) { return 42; }
+int x = 12;
+#endif
+
+extern int foo(void);
+extern int x;
+
+int bar() { return foo() + x; }
+//.
+// CHECK: @x = internal addrspace(1) global i32 12, align 4
+//.
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define dso_local i32 @bar
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:[[CALL:%.*]] = call i32 @foo()
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
+// CHECK-NEXT:[[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
+// CHECK-NEXT:ret i32 [[ADD]]
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define internal i32 @foo
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:[[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:ret i32 42
+//
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+//.
Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
 //===--===//
 
 #include "clang/CodeGen/CodeGenAction.h"
+#include "CGCall.h"
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
@@ -262,7 +263,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M = nullptr) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -271,8 +272,8 @@
 // in LLVM IR.
 if (F.isIntrinsic())
   continue;
-Gen->CGM().mergeDefaultFunctionDefinitionAttribute

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 529490.
jhuber6 added a comment.

Updating, in order to do this starting with bitcode I had to expose a helper
that performs this operation using the options directly rather than through the
`CodeGenModule`. This should keep the existing interfaces intact by shuttling
them through a new internal function while also letting me write this standalone
function that can be used without a `CodeGenModule`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGCall.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c

Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
 //===--===//
 
 #include "clang/CodeGen/CodeGenAction.h"
+#include "CGCall.h"
 #include "CodeGenModule.h"
 #include "CoverageMappingGen.h"
 #include "MacroPPCallbacks.h"
@@ -262,7 +263,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M = nullptr) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -271,8 +272,8 @@
 // in LLVM IR.
 if (F.isIntrinsic())
   continue;
-Gen->CGM().mergeDefaultFunctionDefinitionAttributes(F,
-LM.Internalize);
+CodeGen::mergeDefaultFunctionDefinitionAttributes(
+F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize);
   }
 
 CurLinkModule = LM.Module.get();
@@ -280,14 +281,14 @@
 bool Err;
 if (LM.Internalize) {
   Err = Linker::linkModules(
-  *getModule(), std::move(LM.Module), LM.LinkFlags,
+  M ? *M : *getModule(), std::move(LM.Module), LM.LinkFlags,
   [](llvm::Module &M, const llvm::StringSet<> &GVS) {
 internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
   return !GV.hasName() || (GVS.count(GV.getName()) == 0);
 });
   });
 } else {
-  Err = Linker::linkModules(*getModule(), std::move(LM.Module),
+  Err = Linker::linkModules(M ? *M : *getModule(), std::move(LM.Module),
 LM.LinkFlags);
 }
 
@@ -993,6 +994,36 @@
 delete VMContext;
 }
 
+bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
+  if (!LinkModules.empty())
+return false;
+
+  for (const CodeGenOptions::BitcodeFileToLink &F :
+   CI.getCodeGenOpts().LinkBitcodeFiles) {
+auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBuf) {
+  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << BCBuf.getError().message();
+  LinkModules.clear();
+  return true;
+}
+
+Expected> ModuleOrErr =
+getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
+if (!ModuleOrErr) {
+  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+CI.getDiagnostics().Report(diag::err_cannot_open_file)
+<< F.Filename << EIB.message();
+  });
+  LinkModules.clear();
+  return true;
+}
+LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+   F.Internalize, F.LinkFlags});
+  }
+  return false;
+}
+
 bool CodeGenAction::hasIRSupport() const { return true; }
 
 void CodeGenAction::EndSourceFileAction() {
@@ -1050,30 +1081,8 @@
   VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers);
 
   // Load bitcode modules to link with

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rGdc81d2a4d5b3: [OpenMP] Fix using the target ID when using 
the new driver (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D150998?vs=523924&id=529476#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -62,3 +62,7 @@
 // RUN:   --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 
-fopenmp-new-driver %s  2>&1 | \
 // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
 // CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" 
{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+
+// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a:sramecc-:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
+// CHECK-TARGET-ID: 
clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8403,7 +8403,7 @@
 C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(),
   OffloadAction->getOffloadingDeviceKind());
 StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input));
-StringRef Arch = (OffloadAction->getOffloadingArch())
+StringRef Arch = OffloadAction->getOffloadingArch()
  ? OffloadAction->getOffloadingArch()
  : TCArgs.getLastArgValue(options::OPT_march_EQ);
 StringRef Kind =
@@ -8416,14 +8416,24 @@
 llvm::copy_if(Features, std::back_inserter(FeatureArgs),
   [](StringRef Arg) { return !Arg.startswith("-target"); });
 
+if (TC->getTriple().isAMDGPU()) {
+  for (StringRef Feature : llvm::split(Arch.split(':').second, ':')) {
+FeatureArgs.emplace_back(
+Args.MakeArgString(Feature.take_back() + Feature.drop_back()));
+  }
+}
+
+// TODO: We need to pass in the full target-id and handle it properly in 
the
+// linker wrapper.
 SmallVector Parts{
 "file=" + File.str(),
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),
 "kind=" + Kind.str(),
 };
 
-if (TC->getDriver().isUsingLTO(/* IsOffload */ true))
+if (TC->getDriver().isUsingLTO(/* IsOffload */ true) ||
+TC->getTriple().isAMDGPU())
   for (StringRef Feature : FeatureArgs)
 Parts.emplace_back("feature=" + Feature.str());
 


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -62,3 +62,7 @@
 // RUN:   --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s  2>&1 | \
 // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
 // CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+
+// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
+// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8403,7 +8403,7 @@
 C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(),
   OffloadAction->getOffloadingDeviceKind());
 StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input));
-StringRef Arch = (OffloadAction->getOffloadingArch())
+StringRef Arch = OffloadAction->getOffloadingArch()
  ? OffloadAction->getOffloadingArch()
  : TCArgs.getLastArgValue(options::OPT_march_EQ);
 StringRef Kind =
@@ -8416,14 +8416,24 @@
 llvm::copy_if(Features, std::back_inserter(FeatureArgs),
   [](StringRef Arg) { return !Arg.startswith("-target")

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

Scratch that, `ASTContext::getFunctionFeatureMap` is used. I wonder if we could 
forgo that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

Just realized this is probably going to be a bit more painful. the attribute 
propagation pass requires a `CodeGenModule` which isn't built without an 
`ASTContext` so it's not available here. Nothing those functions do explicitly 
requires the full `CGM`, they only use the options and the target info. So it's 
possible to make these functions more generic using only the `CompilerInstance` 
instead, but it'll be a lot nosier.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D152391#4404106 , @tra wrote:

>> clang in.bc -Xclang -mlink-builtin-bitcode -Xclang libdevice.10.bc
>
> If that's something we intend to expose to the user, should we consider 
> promoting it to a top-level driver option?

I'm not sure, this probably wouldn't be relevant for non-compiler developers so 
I'm fine keeping it as an `-Xclang` workaround.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152391/new/

https://reviews.llvm.org/D152391

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D152391: [Clang] Allow bitcode linking when the input is LLVM-IR

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: tra, yaxunl, jdoerfert, tianshilei1992, MaskRay, 
JonChesterfield, phosek.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Clang provides the `-mlink-bitcode-file` and `-mlink-builtin-bitcode`
options to insert LLVM-IR into the current TU. These are usefuly
primarily for including LLVM-IR files that require special handling to
be correct and cannot be linked normally, such as GPU vendor libraries
like `libdevice.10.bc`. Currently these options can only be used if the
source input goes through the AST consumer path. This patch makes the
changes necessary to also support this when the input is LLVM-IR. This
will allow the following operation:

  clang in.bc -Xclang -mlink-builtin-bitcode -Xclang libdevice.10.bc


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152391

Files:
  clang/include/clang/CodeGen/CodeGenAction.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/test/CodeGen/link-bitcode-file.c

Index: clang/test/CodeGen/link-bitcode-file.c
===
--- clang/test/CodeGen/link-bitcode-file.c
+++ clang/test/CodeGen/link-bitcode-file.c
@@ -11,6 +11,14 @@
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:-emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
 
+// Make sure we can perform the same options if the input is LLVM-IR
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \
+// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+
 int f(void);
 
 #ifdef BITCODE
Index: clang/lib/CodeGen/CodeGenAction.cpp
===
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -262,7 +262,7 @@
 }
 
 // Links each entry in LinkModules into our module.  Returns true on error.
-bool LinkInModules() {
+bool LinkInModules(llvm::Module *M = nullptr) {
   for (auto &LM : LinkModules) {
 assert(LM.Module && "LinkModule does not actually have a module");
 if (LM.PropagateAttrs)
@@ -280,14 +280,14 @@
 bool Err;
 if (LM.Internalize) {
   Err = Linker::linkModules(
-  *getModule(), std::move(LM.Module), LM.LinkFlags,
+  M ? *M : *getModule(), std::move(LM.Module), LM.LinkFlags,
   [](llvm::Module &M, const llvm::StringSet<> &GVS) {
 internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
   return !GV.hasName() || (GVS.count(GV.getName()) == 0);
 });
   });
 } else {
-  Err = Linker::linkModules(*getModule(), std::move(LM.Module),
+  Err = Linker::linkModules(M ? *M : *getModule(), std::move(LM.Module),
 LM.LinkFlags);
 }
 
@@ -993,6 +993,36 @@
 delete VMContext;
 }
 
+bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
+  if (!LinkModules.empty())
+return false;
+
+  for (const CodeGenOptions::BitcodeFileToLink &F :
+   CI.getCodeGenOpts().LinkBitcodeFiles) {
+auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBuf) {
+  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << BCBuf.getError().message();
+  LinkModules.clear();
+  return true;
+}
+
+Expected> ModuleOrErr =
+getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
+if (!ModuleOrErr) {
+  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+CI.getDiagnostics().Report(diag::err_cannot_open_file)
+<< F.Filename << EIB.message();
+  });
+  LinkModules.clear();
+  return true;
+}
+LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+   F.Internalize, F.LinkFlags});
+  }
+  return false;
+}
+
 bool CodeGenAction::hasIRSupport() const { return true; }
 
 void CodeGenAction::EndSourceFileAction() {
@@ -1050,30 +1080,8 @@
   VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers);
 
   // Load bitcode modules to link with, if we need to.
-  if (LinkModules.empty())
-for (const CodeGenOptions::BitcodeFileToLink &F :
- CI.getCodeGenOpts().LinkBitcodeFiles) {
-  auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-  if (!BCBuf) {
-CI.getDiagnostics().Report(diag::err_cannot_open_file)
-<< F.Filename << BCBuf.getError().message();
-LinkModules.clear();
-

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D150998#4403444 , @yaxunl wrote:

> what happens to
>
>   clang -xhip a.c --offload-arch=gfx90a:xnack+ --offload-arch=gfx90a:xnack- 
> --offload-new-driver -fgpu-rdc
>   clang -xhip b.c --offload-arch=gfx90a:xnack+ --offload-arch=gfx90a:xnack- 
> --offload-new-driver -fgpu-rdc
>   clang --offload-link a.o b.o
>
> Basically gfx90a:xnack+ and gfx90a:xnack- need to be treated as distinct GPU 
> arch's and the fat binary should contain different code objects for them.

In this logic they would both map to `gfx90a` but be given distinct images and 
then be linked together into a single one with `gfx90a` metadata. This is the 
part that I'm saying should probably be a follow-up fix to handle this 
correctly in the linker wrapper.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D150998#4403401 , @yaxunl wrote:

> In D150998#4403359 , @jhuber6 wrote:
>
>> Can we use this approach for now and land this? It makes the "new driver" 
>> less broken than it currently is as it supports target ID compilation in the 
>> general term. Fixing the merging rules will be a rather large overhaul so 
>> I'd like this to work in the meantime.
>>
>> This patch allows `--offload-arch=gfx90a:xnack+` to work. It does not fix if 
>> the user links in a library that has `--offload-arch=gfx90a:xnack-` as well.
>
> can we add a test to make sure  `--offload-arch=gfx90a:xnack+`  and 
> `--offload-arch=gfx90a:xnack-` work together? It is a very common use case 
> for HIP.

With the current patch, they would both be linked together and it would 
probably set the `xnack` value to the last one that showed up in the link list. 
E.g.

  clang -xhip a.c --offload-arch=gfx90a:xnack+ --offload-new-driver -fgpu-rdc
  clang -xhip b.c --offload-arch=gfx90a:xnack- --offload-new-driver -fgpu-rdc
  clang --offload-link a.o b.o

Would result in a.o and b.o getting linked together with `xnack-` set as the 
backend attribute.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-06-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

Can we use this approach for now and land this? It makes the "new driver" less 
broken than it currently is as it supports target ID compilation in the general 
term. Fixing the merging rules will be a rather large overhaul so I'd like this 
to work in the meantime.

This patch allows `--offload-arch=gfx90a:xnack+` to work. It does not fix if 
the user links in a library that has `--offload-arch=gfx90a:xnack-` as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151839: [LinkerWrapper] Fix static library symbol resolution

2023-06-01 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rGa26bd95325f1: [LinkerWrapper] Fix static library symbol 
resolution (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151839/new/

https://reviews.llvm.org/D151839

Files:
  clang/test/Driver/linker-wrapper-libs.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1163,20 +1163,21 @@
 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
 /// extract any symbols from it.
 Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
- StringSaver &Saver,
+ bool IsArchive, StringSaver &Saver,
  DenseMap &Syms) {
   Expected IRSymtabOrErr = readIRSymtab(Buffer);
   if (!IRSymtabOrErr)
 return IRSymtabOrErr.takeError();
 
-  bool ShouldExtract = false;
+  bool ShouldExtract = !IsArchive;
+  DenseMap TmpSyms;
   for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
   if (Sym.isFormatSpecific() || !Sym.isGlobal())
 continue;
 
   bool NewSymbol = Syms.count(Sym.getName()) == 0;
-  auto &OldSym = Syms[Saver.save(Sym.getName())];
+  auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];
 
   // We will extract if it defines a currenlty undefined non-weak symbol.
   bool ResolvesStrongReference =
@@ -1192,23 +1193,31 @@
 
   // Update this symbol in the "table" with the new information.
   if (OldSym & Sym_Undefined && !Sym.isUndefined())
-OldSym = static_cast(OldSym & ~Sym_Undefined);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym & ~Sym_Undefined);
   if (Sym.isUndefined() && NewSymbol)
-OldSym = static_cast(OldSym | Sym_Undefined);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym | Sym_Undefined);
   if (Sym.isWeak())
-OldSym = static_cast(OldSym | Sym_Weak);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym | Sym_Weak);
 }
   }
 
+  // If the file gets extracted we update the table with the new symbols.
+  if (ShouldExtract)
+Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
+
   return ShouldExtract;
 }
 
 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
 /// any symbols from it.
 Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
-StringSaver &Saver,
+bool IsArchive, StringSaver &Saver,
 DenseMap &Syms) {
-  bool ShouldExtract = false;
+  bool ShouldExtract = !IsArchive;
+  DenseMap TmpSyms;
   for (SymbolRef Sym : Obj.symbols()) {
 auto FlagsOrErr = Sym.getFlags();
 if (!FlagsOrErr)
@@ -1223,7 +1232,7 @@
   return NameOrErr.takeError();
 
 bool NewSymbol = Syms.count(*NameOrErr) == 0;
-auto &OldSym = Syms[Saver.save(*NameOrErr)];
+auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr];
 
 // We will extract if it defines a currenlty undefined non-weak symbol.
 bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
@@ -1240,12 +1249,19 @@
 
 // Update this symbol in the "table" with the new information.
 if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
-  OldSym = static_cast(OldSym & ~Sym_Undefined);
+  TmpSyms[Saver.save(*NameOrErr)] =
+  static_cast(OldSym & ~Sym_Undefined);
 if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol)
-  OldSym = static_cast(OldSym | Sym_Undefined);
+  TmpSyms[Saver.save(*NameOrErr)] =
+  static_cast(OldSym | Sym_Undefined);
 if (*FlagsOrErr & SymbolRef::SF_Weak)
-  OldSym = static_cast(OldSym | Sym_Weak);
+  TmpSyms[Saver.save(*NameOrErr)] = static_cast(OldSym | Sym_Weak);
   }
+
+  // If the file gets extracted we update the table with the new symbols.
+  if (ShouldExtract)
+Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
+
   return ShouldExtract;
 }
 
@@ -1255,18 +1271,19 @@
 ///   1) It defines an undefined symbol in a regular object filie.
 ///   2) It defines a global symbol without hidden visibility that has not
 ///  yet been defined.
-Expected getSymbols(StringRef Image, OffloadKind Kind, StringSaver &Saver,
+Expected getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
+  StringSaver &Saver,
   DenseMap &Syms) {
   MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
   switch (identify_magic(Image)) {
   case file_magic:

[PATCH] D151839: [LinkerWrapper] Fix static library symbol resolution

2023-05-31 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/test/Driver/linker-wrapper-libs.c:27
 //
 // Check that we extract a static library defining an undefined symbol.
 //

tra wrote:
> jhuber6 wrote:
> > tra wrote:
> > > How does this test test the functionality of the undefined symbol? E.g. 
> > > how does it fail now, before the patch?
> > > 
> > > Is there an explicit check we could to do to make sure things work as 
> > > intended as opposed to "there's no obvious error" which may also mean "we 
> > > forgot to process *undefined.bc".
> > Yeah, I wasn't sure how to define a good test for this. The problem I 
> > encountered before making this patch was that having another file that used 
> > an undefined symbol would override the `NewSymbol` check and then would 
> > prevent it from being extracted. So this checks that case.
> AFAICT, with -DUNDEFINED, the file would have only `extern int sym;`. CE says 
> suggests that it produces an embty bitcode file: 
> https://godbolt.org/z/EY9a8Pfeb
> 
> What exactly is supposed to be in the `*.undefined.bc` ?  If it's intended to 
> have an undefined reference to `sym` you need to add some sort of a reference 
> to it. 
> 
Good catch, forgot about that. It's why the other use of `extern sym` returns 
it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151839/new/

https://reviews.llvm.org/D151839

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151839: [LinkerWrapper] Fix static library symbol resolution

2023-05-31 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 527240.
jhuber6 added a comment.

Fix test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151839/new/

https://reviews.llvm.org/D151839

Files:
  clang/test/Driver/linker-wrapper-libs.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1163,20 +1163,21 @@
 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
 /// extract any symbols from it.
 Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
- StringSaver &Saver,
+ bool IsArchive, StringSaver &Saver,
  DenseMap &Syms) {
   Expected IRSymtabOrErr = readIRSymtab(Buffer);
   if (!IRSymtabOrErr)
 return IRSymtabOrErr.takeError();
 
-  bool ShouldExtract = false;
+  bool ShouldExtract = !IsArchive;
+  DenseMap TmpSyms;
   for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
   if (Sym.isFormatSpecific() || !Sym.isGlobal())
 continue;
 
   bool NewSymbol = Syms.count(Sym.getName()) == 0;
-  auto &OldSym = Syms[Saver.save(Sym.getName())];
+  auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];
 
   // We will extract if it defines a currenlty undefined non-weak symbol.
   bool ResolvesStrongReference =
@@ -1192,23 +1193,31 @@
 
   // Update this symbol in the "table" with the new information.
   if (OldSym & Sym_Undefined && !Sym.isUndefined())
-OldSym = static_cast(OldSym & ~Sym_Undefined);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym & ~Sym_Undefined);
   if (Sym.isUndefined() && NewSymbol)
-OldSym = static_cast(OldSym | Sym_Undefined);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym | Sym_Undefined);
   if (Sym.isWeak())
-OldSym = static_cast(OldSym | Sym_Weak);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym | Sym_Weak);
 }
   }
 
+  // If the file gets extracted we update the table with the new symbols.
+  if (ShouldExtract)
+Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
+
   return ShouldExtract;
 }
 
 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
 /// any symbols from it.
 Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
-StringSaver &Saver,
+bool IsArchive, StringSaver &Saver,
 DenseMap &Syms) {
-  bool ShouldExtract = false;
+  bool ShouldExtract = !IsArchive;
+  DenseMap TmpSyms;
   for (SymbolRef Sym : Obj.symbols()) {
 auto FlagsOrErr = Sym.getFlags();
 if (!FlagsOrErr)
@@ -1223,7 +1232,7 @@
   return NameOrErr.takeError();
 
 bool NewSymbol = Syms.count(*NameOrErr) == 0;
-auto &OldSym = Syms[Saver.save(*NameOrErr)];
+auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr];
 
 // We will extract if it defines a currenlty undefined non-weak symbol.
 bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
@@ -1240,12 +1249,19 @@
 
 // Update this symbol in the "table" with the new information.
 if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
-  OldSym = static_cast(OldSym & ~Sym_Undefined);
+  TmpSyms[Saver.save(*NameOrErr)] =
+  static_cast(OldSym & ~Sym_Undefined);
 if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol)
-  OldSym = static_cast(OldSym | Sym_Undefined);
+  TmpSyms[Saver.save(*NameOrErr)] =
+  static_cast(OldSym | Sym_Undefined);
 if (*FlagsOrErr & SymbolRef::SF_Weak)
-  OldSym = static_cast(OldSym | Sym_Weak);
+  TmpSyms[Saver.save(*NameOrErr)] = static_cast(OldSym | Sym_Weak);
   }
+
+  // If the file gets extracted we update the table with the new symbols.
+  if (ShouldExtract)
+Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
+
   return ShouldExtract;
 }
 
@@ -1255,18 +1271,19 @@
 ///   1) It defines an undefined symbol in a regular object filie.
 ///   2) It defines a global symbol without hidden visibility that has not
 ///  yet been defined.
-Expected getSymbols(StringRef Image, OffloadKind Kind, StringSaver &Saver,
+Expected getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
+  StringSaver &Saver,
   DenseMap &Syms) {
   MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
   switch (identify_magic(Image)) {
   case file_magic::bitcode:
-return getSymbolsFromBitcode(Buffer, Kind, Saver, Syms);
+return getSymbolsFromBitc

[PATCH] D151839: [LinkerWrapper] Fix static library symbol resolution

2023-05-31 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/test/Driver/linker-wrapper-libs.c:27
 //
 // Check that we extract a static library defining an undefined symbol.
 //

tra wrote:
> How does this test test the functionality of the undefined symbol? E.g. how 
> does it fail now, before the patch?
> 
> Is there an explicit check we could to do to make sure things work as 
> intended as opposed to "there's no obvious error" which may also mean "we 
> forgot to process *undefined.bc".
Yeah, I wasn't sure how to define a good test for this. The problem I 
encountered before making this patch was that having another file that used an 
undefined symbol would override the `NewSymbol` check and then would prevent it 
from being extracted. So this checks that case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151839/new/

https://reviews.llvm.org/D151839

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151839: [LinkerWrapper] Fix static library symbol resolution

2023-05-31 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield, tra, 
yaxunl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

The linker wrapper performs its own very basic symbol resolution for the
purpose of supporting standard static library semantics. We do this here
because the Nvidia `nvlink` wrapper does not support static linking and
we have some offloading specific extensions.

Currently, we always place symbols in the "table" even if they aren't
extracted. This caused the logic to fail when many files were used that
referenced the same undefined variable. This patch changes the pass to
only add the symbols to the global "table" if the file is actually
extracted.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D151839

Files:
  clang/test/Driver/linker-wrapper-libs.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1163,20 +1163,21 @@
 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
 /// extract any symbols from it.
 Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
- StringSaver &Saver,
+ bool IsArchive, StringSaver &Saver,
  DenseMap &Syms) {
   Expected IRSymtabOrErr = readIRSymtab(Buffer);
   if (!IRSymtabOrErr)
 return IRSymtabOrErr.takeError();
 
-  bool ShouldExtract = false;
+  bool ShouldExtract = !IsArchive;
+  DenseMap TmpSyms;
   for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
   if (Sym.isFormatSpecific() || !Sym.isGlobal())
 continue;
 
   bool NewSymbol = Syms.count(Sym.getName()) == 0;
-  auto &OldSym = Syms[Saver.save(Sym.getName())];
+  auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];
 
   // We will extract if it defines a currenlty undefined non-weak symbol.
   bool ResolvesStrongReference =
@@ -1192,23 +1193,31 @@
 
   // Update this symbol in the "table" with the new information.
   if (OldSym & Sym_Undefined && !Sym.isUndefined())
-OldSym = static_cast(OldSym & ~Sym_Undefined);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym & ~Sym_Undefined);
   if (Sym.isUndefined() && NewSymbol)
-OldSym = static_cast(OldSym | Sym_Undefined);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym | Sym_Undefined);
   if (Sym.isWeak())
-OldSym = static_cast(OldSym | Sym_Weak);
+TmpSyms[Saver.save(Sym.getName())] =
+static_cast(OldSym | Sym_Weak);
 }
   }
 
+  // If the file gets extracted we update the table with the new symbols.
+  if (ShouldExtract)
+Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
+
   return ShouldExtract;
 }
 
 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
 /// any symbols from it.
 Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
-StringSaver &Saver,
+bool IsArchive, StringSaver &Saver,
 DenseMap &Syms) {
-  bool ShouldExtract = false;
+  bool ShouldExtract = !IsArchive;
+  DenseMap TmpSyms;
   for (SymbolRef Sym : Obj.symbols()) {
 auto FlagsOrErr = Sym.getFlags();
 if (!FlagsOrErr)
@@ -1223,7 +1232,7 @@
   return NameOrErr.takeError();
 
 bool NewSymbol = Syms.count(*NameOrErr) == 0;
-auto &OldSym = Syms[Saver.save(*NameOrErr)];
+auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr];
 
 // We will extract if it defines a currenlty undefined non-weak symbol.
 bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
@@ -1240,12 +1249,19 @@
 
 // Update this symbol in the "table" with the new information.
 if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
-  OldSym = static_cast(OldSym & ~Sym_Undefined);
+  TmpSyms[Saver.save(*NameOrErr)] =
+  static_cast(OldSym & ~Sym_Undefined);
 if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol)
-  OldSym = static_cast(OldSym | Sym_Undefined);
+  TmpSyms[Saver.save(*NameOrErr)] =
+  static_cast(OldSym | Sym_Undefined);
 if (*FlagsOrErr & SymbolRef::SF_Weak)
-  OldSym = static_cast(OldSym | Sym_Weak);
+  TmpSyms[Saver.save(*NameOrErr)] = static_cast(OldSym | Sym_Weak);
   }
+
+  // If the file gets extracted we update the table with the new symbols.
+  if (ShouldExtract)
+Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-05-23 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8475
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),
 "kind=" + Kind.str(),

yaxunl wrote:
> jhuber6 wrote:
> > saiislam wrote:
> > > Shouldn't Arch (targetID here) should be passed along instead of just the 
> > > processor?
> > > 
> > > For example, `gfx90a:xnack+` and `gfx90a:xnack-` should be treated 
> > > differently.
> > So the problem there is that this will cause us to no longer link in 
> > something like the OpenMP runtime library since `gfx90a` != 
> > `gfx90a:xnack+`. Right now the behavior is that we will link them both 
> > together since the architecture matches but then the attributes will get 
> > resolved the same way we handle `-mattr=+x,-x`. I'm not sure what the 
> > expected behaviour is here.
> targetID is part of ROCm ABI as it is returned as part of Isa::GetIsaName 
> (https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/rocm-5.5.x/src/core/runtime/isa.cpp#L98)
>  . 
> 
> the compatibility rule for targetID is specified by 
> https://clang.llvm.org/docs/ClangOffloadBundler.html#target-id . For example, 
> bundle entry with gfx90a can be consumed by device with GetIsaName 
> gfx90a:xnack+ or gfx90a:xnack- . but bundle entry with gfx90a:xnack+ can only 
> be consumed by device with GetIsaName gfx90a:xnack+.
> 
> Language runtime is supposed to do a compatibility check for bundle entry 
> with the device GetIsaName. Isa::IsCompatible 
> (https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/3b939c398bdac0c2b9a860ff9a0ed0be0c80f911/src/core/runtime/isa.cpp#L73)
>  can be used to do that. For convenience, language runtime is expected to use 
> targetID for identifying bundle entries instead of re-construct targetID from 
> features when needed.
> 
> targetID is also used for compatibility checks when linking bitcode.
> 
So what we need is some more sophisticated logic in the linker wrapper to merge 
the binaries according to these rules. However the handling will definitely 
require pulling this apart when we send it to LTO.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

We should at least be able to `reinterpret_cast` between cases we know are 
compatible, as the OpenCL check does. One of the problems with the numerical 
address space is it doesn't have any information to know if that's strictly 
legal or not. I'm not sure if casting away an address space is always legal, 
it's generally what we do in LLVM-IR.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4361367 , @ebevhan wrote:

> I don't think the standard argument really holds. It doesn't mention 
> restrictions on address spaces because it doesn't have to, since they don't 
> exist in C++. If they did, I'm pretty sure that reinterpret_cast would 
> disallow arbitrary address space-modifying casts, since they wouldn't 
> necessarily be bitcasts.

The reason cited for this change was that the standard does not allow 
`reinterpret_cast` to drop qualifiers, I don't think that's true so we're free 
to define our own behavior. Whether or not this is safe or desirable is up to 
us, and I'm arguing that for C++ it should be permitted.

> Like @arsenm said, any behaviors that you're using or observing regarding 
> conversion of target address spaces in both C and C++ are purely 
> coincidental. I don't think it's a great idea to add more such coincidental 
> behaviors. The result will be that future code will become dependent on these 
> arbitrary, less restrictive behaviors, and it will be much harder to properly 
> define sensible semantics later on.

The behavior isn't coincidental, it does exactly what you expect if you were to 
use the same address space in LLVM-IR. The problem is that there are no 
semantic checks on them so if you use them incorrectly it will break. I feel 
like this is a separate issue and I don't know why it would prevent us from 
doing *any* kind of address space cast in C++.  There is no alternative in C++ 
and we already permit this with C-style casts, we cannot use OpenCL extensions 
like `addrspace_cast` so that leaves us unable to use C++ to write programs we 
want to write. We already use numerical address spaces in the OpenMP GPU 
runtime, and internally there we need to use C to do all the address space 
casts because of this.

There would be definite value in using the target's information to map address 
spaces to the known OpenCL ones so we could share some of their checks, but I 
feel like that's a separate issue.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4361237 , @ebevhan wrote:

> Clang hasn't needed to formalize all of the address space behavior because 
> it's managed to piggyback off of the language semantics provided by OpenCL, 
> and no targets really have had a need for it. Once you start looking at the 
> target AS stuff on its own, you realize it's not really that well defined, 
> and making it even less defined by allowing arbitrary conversions isn't the 
> solution.

I'd rather have an operation whose semantics are a little dangerous than 
something that doesn't work at all. As it stands we need to use C-style casts 
for this and I don't think there's a good reason to forbid this at least from 
the C++ standard point of view. For OpenCL where we have the concept of address 
spaces it makes sense, but not for C++.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4361081 , @arsenm wrote:

> They bypass all semantic checks. For example if you declare something as 
> address space 4, it will let you write to it unlike __constant__. It will let 
> you place stack objects in globals and don't interact correctly with address 
> space typed pointers in builtins (and any other context where you would want 
> to interact with the matching language addrspace). Don't think sizeof works 
> as you would expect either. Whatever is happening is not by design

I'm not sure how much work it would be to line up the numbers with the 
equivalent OpenCL type using the target info, or if that would break anything. 
However, I think this is out of scope, because even if we did have such 
semantic checks we'd still want to be able to `reinterpret_cast` on them in C++ 
which currently isn't allowed.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151098: [Clang][Docs] Add help test to `-march` and `-mcpu` to suggest `-mcpu=help`

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGba9590d8bc76: [Clang][Docs] Add help test to `-march` and 
`-mcpu` to suggest `-mcpu=help` (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151098/new/

https://reviews.llvm.org/D151098

Files:
  clang/include/clang/Driver/Options.td


Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3494,7 +3494,8 @@
 def mwatchos_version_min_EQ : Joined<["-"], "mwatchos-version-min=">, 
Group;
 def mwatchos_simulator_version_min_EQ : Joined<["-"], 
"mwatchos-simulator-version-min=">;
 def mwatchsimulator_version_min_EQ : Joined<["-"], 
"mwatchsimulator-version-min=">, Alias;
-def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>;
+def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>,
+  HelpText<"For a list of availible architectures for the target use 
'-mcpu=help'">;
 def masm_EQ : Joined<["-"], "masm=">, Group, Flags<[NoXarchOption]>;
 def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group, 
Flags<[CC1Option]>,
   Values<"att,intel">,
@@ -3518,7 +3519,8 @@
 def mguard_EQ : Joined<["-"], "mguard=">, Group, 
Flags<[NoXarchOption]>,
   HelpText<"Enable or disable Control Flow Guard checks and guard tables 
emission">,
   Values<"none,cf,cf-nochecks">;
-def mcpu_EQ : Joined<["-"], "mcpu=">, Group;
+def mcpu_EQ : Joined<["-"], "mcpu=">, Group, 
+  HelpText<"For a list of availible CPUs for the target use '-mcpu=help'">;
 def mmcu_EQ : Joined<["-"], "mmcu=">, Group;
 def msim : Flag<["-"], "msim">, Group;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group;


Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3494,7 +3494,8 @@
 def mwatchos_version_min_EQ : Joined<["-"], "mwatchos-version-min=">, Group;
 def mwatchos_simulator_version_min_EQ : Joined<["-"], "mwatchos-simulator-version-min=">;
 def mwatchsimulator_version_min_EQ : Joined<["-"], "mwatchsimulator-version-min=">, Alias;
-def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>;
+def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>,
+  HelpText<"For a list of availible architectures for the target use '-mcpu=help'">;
 def masm_EQ : Joined<["-"], "masm=">, Group, Flags<[NoXarchOption]>;
 def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group, Flags<[CC1Option]>,
   Values<"att,intel">,
@@ -3518,7 +3519,8 @@
 def mguard_EQ : Joined<["-"], "mguard=">, Group, Flags<[NoXarchOption]>,
   HelpText<"Enable or disable Control Flow Guard checks and guard tables emission">,
   Values<"none,cf,cf-nochecks">;
-def mcpu_EQ : Joined<["-"], "mcpu=">, Group;
+def mcpu_EQ : Joined<["-"], "mcpu=">, Group, 
+  HelpText<"For a list of availible CPUs for the target use '-mcpu=help'">;
 def mmcu_EQ : Joined<["-"], "mmcu=">, Group;
 def msim : Flag<["-"], "msim">, Group;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4360919 , @arsenm wrote:

> In D151087#4360695 , @jhuber6 wrote:
>
>> I don't think that's something we can diagnose here with just the address 
>> space number. it would require information from the underlying target for 
>> the expected pointer qualities to the address space.
>
> Yes, this is mandatory. Numbered address spaces are broken and just work by 
> accident and I don't love seeing new code rely on whatever random behavior 
> they have now. Right now they happen to codegen to something that appears to 
> work, while bypassing any kind of sensible semantic checking for what you're 
> doing with them. If you want to really use this, we need to find some way to 
> map numbers back into language address spaces and make them fully aware of 
> the target properties

How are they broken? The expectation is just that they line up with what the 
backend defines them as, which should be a stable target. We could potentially 
use target info to map the numbered address spaces into something sensible. 
E.g. `1 = opencl_lobal`, `3 = opencl_local`, `5 = opencl_private`. I think 
that's commend between NVPTX and AMDGPU. But I still don't think that we should 
be preventing from even doing wrong things with them if the user explicitly 
requests it.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4360818 , @ebevhan wrote:

> By "freestanding C++" I assume you mean "C++ without the OpenCL C++ 
> extension" and not "C++ without extensions at all", because in the latter 
> case, you don't have address spaces either.
>
> This is what I meant in D58346  by "There's 
> many places in the codebase where OpenCL flags restrict generic address space 
> behavior." Isn't the solution here to allow addrspace_cast even when OpenCL 
> C++ isn't turned on, as a more generic extension?

You can have address spaced in freestanding C++, they just need to be assigned 
according to the backend, e.g. https://godbolt.org/z/ahazae6Ta. And I don't 
think that's a desirable solution because it would require a language extension 
to cover a use-case that isn't disallowed by the C++ standard anyway. I don't 
see a reason why we shouldn't be able to do this directly.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151098: [Clang][Docs] Add help test to `-march` and `-mcpu` to suggest `-mcpu=help`

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151098#4360750 , @aaron.ballman 
wrote:

> LGTM, though it's a bit jarring that `-march` suggests using `-mcpu=help` 
> (should we ensure that `-march=help` works on all targets instead?)

Using `-march=help` works inadvertently because it will list the targets when 
given an invalid one. We could probably map the `-mcpu=help` handling to 
`-march=help` but I'm not entirely sure what the semantics should be there. The 
distinction between `-march` and `-mcpu` is still not entirely clear to me, 
it's just a matter of whether or not the target decided to choose one or the 
other.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151098/new/

https://reviews.llvm.org/D151098

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4360729 , @ebevhan wrote:

> That's fair. I would like clang to improve and formalize the semantics for 
> generic address space behavior a bit, which was part of the point with D62574 
> .  But there don't seem to be enough people 
> who need something like it to make it happen.
>
> Honestly, looking at the patch again does suggest to me that your use case 
> would be covered. It just wouldn't be done with a reinterpret_cast, but an 
> addrspace_cast. Since every target by default would permit explicit casts 
> with isExplicitAddrSpaceConversionLegal, your desired behavior should work.

The problem is we don't have `addrspace_cast` in freestanding C++, so as it 
stands we currently have no way to perform this operation in C++ which is 
preventing me from implementing things in the LLVM LibC port for GPUs I'm 
working on.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8465-8470
+if (TC->getTriple().isAMDGPU()) {
+  for (StringRef Feature : llvm::split(Arch.split(':').second, ':')) {
+FeatureArgs.emplace_back(
+Args.MakeArgString(Feature.take_back() + Feature.drop_back()));
+  }
+}

saiislam wrote:
> May be use `parseTargetIDWithFormatCheckingOnly()`?
I tried that but it didn't return the strings in the format required by `llc` 
for the `-mattrs` list.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:8475
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),
 "kind=" + Kind.str(),

saiislam wrote:
> Shouldn't Arch (targetID here) should be passed along instead of just the 
> processor?
> 
> For example, `gfx90a:xnack+` and `gfx90a:xnack-` should be treated 
> differently.
So the problem there is that this will cause us to no longer link in something 
like the OpenMP runtime library since `gfx90a` != `gfx90a:xnack+`. Right now 
the behavior is that we will link them both together since the architecture 
matches but then the attributes will get resolved the same way we handle 
`-mattr=+x,-x`. I'm not sure what the expected behaviour is here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150998/new/

https://reviews.llvm.org/D150998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4360668 , @arsenm wrote:

>> It would most likely invalid, but I'm not asserting that `clang` should be 
>> responsible for diagnosing misuse in these cases. Especially because in 
>> generic freestanding C++ we don't have any language options to suggest the 
>> actual semantics.
>
> If the pointer sizes don’t match I think it has to be rejected since it’s no 
> longer a pure bitcast

I don't think that's something we can diagnose here with just the address space 
number. it would require information from the underlying target for the 
expected pointer qualities to the address space.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151098: [Clang][Docs] Add help test to `-march` and `-mcpu` to suggest `-mcpu=help`

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: MaskRay, aaron.ballman.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Currently there is no documentation for these flags, users might find it
confusing to know which values are permitted. This change simply adds
documentation to suggest using `-mcpu=help` to list the availible target
names.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D151098

Files:
  clang/include/clang/Driver/Options.td


Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3494,7 +3494,8 @@
 def mwatchos_version_min_EQ : Joined<["-"], "mwatchos-version-min=">, 
Group;
 def mwatchos_simulator_version_min_EQ : Joined<["-"], 
"mwatchos-simulator-version-min=">;
 def mwatchsimulator_version_min_EQ : Joined<["-"], 
"mwatchsimulator-version-min=">, Alias;
-def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>;
+def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>,
+  HelpText<"For a list of availible architectures for the target use 
'-mcpu=help'">;
 def masm_EQ : Joined<["-"], "masm=">, Group, Flags<[NoXarchOption]>;
 def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group, 
Flags<[CC1Option]>,
   Values<"att,intel">,
@@ -3518,7 +3519,8 @@
 def mguard_EQ : Joined<["-"], "mguard=">, Group, 
Flags<[NoXarchOption]>,
   HelpText<"Enable or disable Control Flow Guard checks and guard tables 
emission">,
   Values<"none,cf,cf-nochecks">;
-def mcpu_EQ : Joined<["-"], "mcpu=">, Group;
+def mcpu_EQ : Joined<["-"], "mcpu=">, Group, 
+  HelpText<"For a list of availible CPUs for the target use '-mcpu=help'">;
 def mmcu_EQ : Joined<["-"], "mmcu=">, Group;
 def msim : Flag<["-"], "msim">, Group;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group;


Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3494,7 +3494,8 @@
 def mwatchos_version_min_EQ : Joined<["-"], "mwatchos-version-min=">, Group;
 def mwatchos_simulator_version_min_EQ : Joined<["-"], "mwatchos-simulator-version-min=">;
 def mwatchsimulator_version_min_EQ : Joined<["-"], "mwatchsimulator-version-min=">, Alias;
-def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>;
+def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>,
+  HelpText<"For a list of availible architectures for the target use '-mcpu=help'">;
 def masm_EQ : Joined<["-"], "masm=">, Group, Flags<[NoXarchOption]>;
 def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group, Flags<[CC1Option]>,
   Values<"att,intel">,
@@ -3518,7 +3519,8 @@
 def mguard_EQ : Joined<["-"], "mguard=">, Group, Flags<[NoXarchOption]>,
   HelpText<"Enable or disable Control Flow Guard checks and guard tables emission">,
   Values<"none,cf,cf-nochecks">;
-def mcpu_EQ : Joined<["-"], "mcpu=">, Group;
+def mcpu_EQ : Joined<["-"], "mcpu=">, Group, 
+  HelpText<"For a list of availible CPUs for the target use '-mcpu=help'">;
 def mmcu_EQ : Joined<["-"], "mmcu=">, Group;
 def msim : Flag<["-"], "msim">, Group;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D151087#4360577 , @ebevhan wrote:

> What would be the semantics of such an operation if the address spaces are 
> disjoint? Or, if the underlying pointer widths aren't the same?

It would most likely invalid, but I'm not asserting that `clang` should be 
responsible for diagnosing misuse in these cases. Especially because in generic 
freestanding C++ we don't have any language options to suggest the actual 
semantics.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151087/new/

https://reviews.llvm.org/D151087

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D151087: [Clang] Permit address space casts with 'reinterpret_cast' in C++

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: rjmccall, ebevhan, jdoerfert, JonChesterfield, 
aaron.ballman.
Herald added subscribers: arichardson, Anastasia.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Previously, D58346  changed the rules to not 
permit address space casts
in C++. The cited reason was that `reinterpret_cast` is not allowed to
remove qualifiers, however the standard only explicitly says that
`const`  and `volatile` qualifiers cannot be removed, see
http://eel.is/c++draft/expr.reinterpret.cast#7. The current behaviour is
suboptimal as it means there is no way in C++ to change address spaces
and we need to rely on unsafe C style casts that aren't permitted by
many styles. This patch changes the handling to only apply to OpenCL.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D151087

Files:
  clang/lib/Sema/SemaCast.cpp
  clang/test/SemaCXX/address-space-conversion.cpp


Index: clang/test/SemaCXX/address-space-conversion.cpp
===
--- clang/test/SemaCXX/address-space-conversion.cpp
+++ clang/test/SemaCXX/address-space-conversion.cpp
@@ -132,23 +132,23 @@
A_ptr ap, A_ptr_1 ap1, A_ptr_2 ap2,
B_ptr bp, B_ptr_1 bp1, B_ptr_2 bp2,
const void __attribute__((address_space(1))) * 
cvp1) {
-  // reinterpret_cast can't be used to cast to a different address space 
unless they are matching (i.e. overlapping).
-  (void)reinterpret_cast(ap1); // expected-error{{reinterpret_cast from 
'A_ptr_1' (aka '__attribute__((address_space(1))) A *') to 'A_ptr' (aka 'A *') 
is not allowed}}
-  (void)reinterpret_cast(ap2); // expected-error{{reinterpret_cast from 
'A_ptr_2' (aka '__attribute__((address_space(2))) A *') to 'A_ptr' (aka 'A *') 
is not allowed}}
+  // reinterpret_cast can be used to cast to a different address space.
+  (void)reinterpret_cast(ap1);
+  (void)reinterpret_cast(ap2);
   (void)reinterpret_cast(bp);
-  (void)reinterpret_cast(bp1); // expected-error{{reinterpret_cast from 
'B_ptr_1' (aka '__attribute__((address_space(1))) B *') to 'A_ptr' (aka 'A *') 
is not allowed}}
-  (void)reinterpret_cast(bp2); // expected-error{{reinterpret_cast from 
'B_ptr_2' (aka '__attribute__((address_space(2))) B *') to 'A_ptr' (aka 'A *') 
is not allowed}}
+  (void)reinterpret_cast(bp1);
+  (void)reinterpret_cast(bp2);
   (void)reinterpret_cast(vp);
-  (void)reinterpret_cast(vp1);   // expected-error{{reinterpret_cast 
from 'void_ptr_1' (aka '__attribute__((address_space(1))) void *') to 'A_ptr' 
(aka 'A *') is not allowed}}
-  (void)reinterpret_cast(vp2);   // expected-error{{reinterpret_cast 
from 'void_ptr_2' (aka '__attribute__((address_space(2))) void *') to 'A_ptr' 
(aka 'A *') is not allowed}}
-  (void)reinterpret_cast(ap);  // expected-error{{reinterpret_cast 
from 'A_ptr' (aka 'A *') to 'A_ptr_1' (aka '__attribute__((address_space(1))) A 
*') is not allowed}}
-  (void)reinterpret_cast(ap2); // expected-error{{reinterpret_cast 
from 'A_ptr_2' (aka '__attribute__((address_space(2))) A *') to 'A_ptr_1' (aka 
'__attribute__((address_space(1))) A *') is not allowed}}
-  (void)reinterpret_cast(bp);  // expected-error{{reinterpret_cast 
from 'B_ptr' (aka 'B *') to 'A_ptr_1' (aka '__attribute__((address_space(1))) A 
*') is not allowed}}
+  (void)reinterpret_cast(vp1);
+  (void)reinterpret_cast(vp2);
+  (void)reinterpret_cast(ap);
+  (void)reinterpret_cast(ap2);
+  (void)reinterpret_cast(bp);
   (void)reinterpret_cast(bp1);
-  (void)reinterpret_cast(bp2); // expected-error{{reinterpret_cast 
from 'B_ptr_2' (aka '__attribute__((address_space(2))) B *') to 'A_ptr_1' (aka 
'__attribute__((address_space(1))) A *') is not allowed}}
-  (void)reinterpret_cast(vp);  // expected-error{{reinterpret_cast 
from 'void_ptr' (aka 'void *') to 'A_ptr_1' (aka 
'__attribute__((address_space(1))) A *') is not allowed}}
+  (void)reinterpret_cast(bp2);
+  (void)reinterpret_cast(vp);
   (void)reinterpret_cast(vp1);
-  (void)reinterpret_cast(vp2); // expected-error{{reinterpret_cast 
from 'void_ptr_2' (aka '__attribute__((address_space(2))) void *') to 'A_ptr_1' 
(aka '__attribute__((address_space(1))) A *') is not allowed}}
+  (void)reinterpret_cast(vp2);
 
   // ... but don't try to cast away constness!
   (void)reinterpret_cast(cvp1); // expected-error{{casts away 
qualifiers}}
Index: clang/lib/Sema/SemaCast.cpp
===
--- clang/lib/Sema/SemaCast.cpp
+++ clang/lib/Sema/SemaCast.cpp
@@ -2481,7 +2481,11 @@
   if (IsAddressSpaceConversion(SrcType, DestType)) {
 Kind = CK_AddressSpaceConversion;
 assert(SrcType->isPointerType() && DestType->isPointerType());
+// C++ 7.6.1.10: The reinterpret_cast operator is only forbidden from
+// casting away constness or vo

[PATCH] D58346: [Sema] Change addr space diagnostics in casts to follow C++ style

2023-05-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D58346#4359631 , @ebevhan wrote:

> In D58346#4359291 , @jhuber6 wrote:
>
>> should C++ really be limited by OpenCL here?
>
> It probably shouldn't. There's many places in the codebase where OpenCL flags 
> restrict generic address space behavior. I have a patch at D62574 
>  that attempts to formalize the address 
> space rules a bit more (which I think also gets rid of the OpenCL 
> restrictions you mention) but there's never been any huge interest and I 
> don't have the time to push for it.
>
> Whether that patch actually solves your problem or not, I don't know.

A quick scan on where the error gets printed suggests not, since we'd also like 
the ability to add an address space. My suggestion is to allow 
`reinterpret_cast` to remove and and add address spaces for C++, OpenCL will 
remain the same.


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58346/new/

https://reviews.llvm.org/D58346

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58346: [Sema] Change addr space diagnostics in casts to follow C++ style

2023-05-21 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.
Herald added a subscriber: arichardson.
Herald added a project: All.

Ran into this change trying to decay a qualifier pointer to a generic address 
space, e.g. https://godbolt.org/z/3dEd4TxjW. I understand that `addrspace_cast` 
was added to replace this functionality, but this isn't a C++ feature so as it 
stands there's no way to perform this operation in C++ and we need to rely on 
C-style casts to perform this basic functionality. I see it was brought up 
earlier, but should C++ really be limited by OpenCL here? I'm not aware of any 
specific mention of these, `reinterpret_cast` only explicitly disallows 
`volatile` and `const` qualifiers as far as I'm aware. The remaining case can 
be thought of as a compiler extension as `[[clang::address_space(n)]]` has no 
meaning otherwise.


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58346/new/

https://reviews.llvm.org/D58346

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150998: [OpenMP] Fix using the target ID when using the new driver

2023-05-19 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, carlo.bertolli, yaxunl, saiislam.
Herald added subscribers: sunshaoce, kerbowa, guansong, tpr, jvesely.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1, MaskRay.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

AMDGPU sometimes uses a novel formatting for their offloading
architecture called the target id. This merges the attributes and the
architecture name into a single string. Previously, we were passing this
as the canonical architecture name. This caused the linker wrapper to
fail to find relevant libraries and then pass an incalid CPU name. This
patch changes the handling in the offload packager to handle the
canonical architecture and then extract the features.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150998

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -62,3 +62,7 @@
 // RUN:   --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 
-fopenmp-new-driver %s  2>&1 | \
 // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
 // CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" 
{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+
+// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp 
--offload-arch=gfx90a:sramecc-:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
+// CHECK-TARGET-ID: 
clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8449,7 +8449,7 @@
 C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(),
   OffloadAction->getOffloadingDeviceKind());
 StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input));
-StringRef Arch = (OffloadAction->getOffloadingArch())
+StringRef Arch = OffloadAction->getOffloadingArch()
  ? OffloadAction->getOffloadingArch()
  : TCArgs.getLastArgValue(options::OPT_march_EQ);
 StringRef Kind =
@@ -8462,14 +8462,22 @@
 llvm::copy_if(Features, std::back_inserter(FeatureArgs),
   [](StringRef Arg) { return !Arg.startswith("-target"); });
 
+if (TC->getTriple().isAMDGPU()) {
+  for (StringRef Feature : llvm::split(Arch.split(':').second, ':')) {
+FeatureArgs.emplace_back(
+Args.MakeArgString(Feature.take_back() + Feature.drop_back()));
+  }
+}
+
 SmallVector Parts{
 "file=" + File.str(),
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + getProcessorFromTargetID(TC->getTriple(), Arch).str(),
 "kind=" + Kind.str(),
 };
 
-if (TC->getDriver().isUsingLTO(/* IsOffload */ true))
+if (TC->getDriver().isUsingLTO(/* IsOffload */ true) ||
+TC->getTriple().isAMDGPU())
   for (StringRef Feature : FeatureArgs)
 Parts.emplace_back("feature=" + Feature.str());
 


Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -62,3 +62,7 @@
 // RUN:   --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s  2>&1 | \
 // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
 // CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+
+// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
+// RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
+// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a,kind=openmp,feature=-sramecc,feature=+xnack
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8449,7 +8449,7 @@
 C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(),
   OffloadAction->getOffloadingDeviceKind());
 StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input));
-StringRef Arch = (OffloadAct

[PATCH] D150930: [Driver] Accept and ignore -fno-lifetime-dse argument

2023-05-19 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D150930#4355240 , @MaskRay wrote:

> Note: for options controlling individual optimization behaviors, there is a 
> large probability that they may not make sense for Clang since the two 
> compilers' internals are so different. 
> Users and projects should learn to not add GCC optimization options for Clang 
> uses.

If this is the case, can we at least add a CMake option to disable LLVM from 
enabling this option against the user's will? As it stands 
https://reviews.llvm.org/rG47f5c54f997a59bb2c65abe6b8b811f6e7553456 represents 
a significant regression in usability. As I understand, the previous patch only 
had an issue with LTO builds, so it should be perfectly reasonable for users to 
at least disable this at the LLVM level if they do not with the LLVM build to 
insert an incompatible flag into their compilation database.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150930/new/

https://reviews.llvm.org/D150930

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150807: [Clang] Remove direct linking of offloading runtimes from the arch tools

2023-05-17 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG66da9ec073ff: [Clang] Remove direct linking of offloading 
runtimes from the arch tools (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150807/new/

https://reviews.llvm.org/D150807

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt
  clang/tools/nvptx-arch/CMakeLists.txt
  clang/tools/nvptx-arch/NVPTXArch.cpp


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -30,7 +30,6 @@
 // and -help) will be hidden.
 static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
 
-#if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_NO_DEVICE = 100,
@@ -75,12 +74,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#include "cuda.h"
-llvm::Error loadCUDA() { return llvm::Error::success(); }
-
-#endif
 
 static int handleError(CUresult Err) {
   const char *ErrStr = nullptr;
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -9,12 +9,4 @@
 set(LLVM_LINK_COMPONENTS Support)
 add_clang_tool(nvptx-arch NVPTXArch.cpp)
 
-find_package(CUDAToolkit QUIET)
-
-# If we found the CUDA library directly we just dynamically link against it.
-if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 
4))
-  target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver clangBasic)
-else()
-  target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
-  target_link_libraries(nvptx-arch PRIVATE clangBasic)
-endif()
+target_link_libraries(nvptx-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- clang/tools/amdgpu-arch/CMakeLists.txt
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -10,12 +10,4 @@
 
 add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
 
-# If we find the HSA runtime we link with it directly.
-find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS 
/opt/rocm)
-if(hsa-runtime64_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P 
EQUAL 4))
-  set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
-  target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64 
clangBasic)
-else()
-  target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
-  target_link_libraries(amdgpu-arch PRIVATE clangBasic)
-endif()
+target_link_libraries(amdgpu-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -30,7 +30,6 @@
   OS << clang::getClangToolFullVersion("amdgpu-arch") << '\n';
 }
 
-#if DYNAMIC_HSA
 typedef enum {
   HSA_STATUS_SUCCESS = 0x0,
 } hsa_status_t;
@@ -80,18 +79,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#if defined(__has_include)
-#if __has_include("hsa/hsa.h")
-#include "hsa/hsa.h"
-#elif __has_include("hsa.h")
-#include "hsa.h"
-#endif
-#endif
-
-llvm::Error loadHSA() { return llvm::Error::success(); }
-#endif
 
 static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
   hsa_device_type_t DeviceType;


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -30,7 +30,6 @@
 // and -help) will be hidden.
 static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
 
-#if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_NO_DEVICE = 100,
@@ -75,12 +74,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#include "cuda.h"
-llvm::Error loadCUDA() { return llvm::Error::success(); }
-
-#endif
 
 static int handleError(CUresult Err) {
   const char *ErrStr = nullptr;
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -9,12 +9,4 @@
 set(LLVM_LINK_COMPONENTS Support)
 add_clang_tool(nvptx-arch NVPTXArch.cpp)
 
-find_package(CUDAToolkit QUIET)
-
-# If we found the CUDA library directly we just dynamically link against it.
-if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
-  target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver clangBasic)
-else()
-  target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
-  target_link_libraries(nvptx-arch PRIVATE clangBasic)
-endif()
+target_

[PATCH] D150807: [Clang] Remove direct linking of offloading runtimes from the arch tools

2023-05-17 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 523125.
jhuber6 added a comment.

Remove now unused definitions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150807/new/

https://reviews.llvm.org/D150807

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt
  clang/tools/nvptx-arch/CMakeLists.txt
  clang/tools/nvptx-arch/NVPTXArch.cpp


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -30,7 +30,6 @@
 // and -help) will be hidden.
 static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
 
-#if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_NO_DEVICE = 100,
@@ -75,12 +74,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#include "cuda.h"
-llvm::Error loadCUDA() { return llvm::Error::success(); }
-
-#endif
 
 static int handleError(CUresult Err) {
   const char *ErrStr = nullptr;
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -9,12 +9,4 @@
 set(LLVM_LINK_COMPONENTS Support)
 add_clang_tool(nvptx-arch NVPTXArch.cpp)
 
-find_package(CUDAToolkit QUIET)
-
-# If we found the CUDA library directly we just dynamically link against it.
-if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 
4))
-  target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver clangBasic)
-else()
-  target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
-  target_link_libraries(nvptx-arch PRIVATE clangBasic)
-endif()
+target_link_libraries(nvptx-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- clang/tools/amdgpu-arch/CMakeLists.txt
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -10,12 +10,4 @@
 
 add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
 
-# If we find the HSA runtime we link with it directly.
-find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS 
/opt/rocm)
-if(hsa-runtime64_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P 
EQUAL 4))
-  set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
-  target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64 
clangBasic)
-else()
-  target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
-  target_link_libraries(amdgpu-arch PRIVATE clangBasic)
-endif()
+target_link_libraries(amdgpu-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -30,7 +30,6 @@
   OS << clang::getClangToolFullVersion("amdgpu-arch") << '\n';
 }
 
-#if DYNAMIC_HSA
 typedef enum {
   HSA_STATUS_SUCCESS = 0x0,
 } hsa_status_t;
@@ -80,18 +79,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#if defined(__has_include)
-#if __has_include("hsa/hsa.h")
-#include "hsa/hsa.h"
-#elif __has_include("hsa.h")
-#include "hsa.h"
-#endif
-#endif
-
-llvm::Error loadHSA() { return llvm::Error::success(); }
-#endif
 
 static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
   hsa_device_type_t DeviceType;


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -30,7 +30,6 @@
 // and -help) will be hidden.
 static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
 
-#if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_NO_DEVICE = 100,
@@ -75,12 +74,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#include "cuda.h"
-llvm::Error loadCUDA() { return llvm::Error::success(); }
-
-#endif
 
 static int handleError(CUresult Err) {
   const char *ErrStr = nullptr;
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -9,12 +9,4 @@
 set(LLVM_LINK_COMPONENTS Support)
 add_clang_tool(nvptx-arch NVPTXArch.cpp)
 
-find_package(CUDAToolkit QUIET)
-
-# If we found the CUDA library directly we just dynamically link against it.
-if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
-  target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver clangBasic)
-else()
-  target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
-  target_link_libraries(nvptx-arch PRIVATE clangBasic)
-endif()
+target_link_libraries(nvptx-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/CMakeLists.txt
=

[PATCH] D150807: [Clang] Remove direct linking of offloading runtimes from the arch tools

2023-05-17 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: tra, yaxunl, JonChesterfield, jdoerfert, 
tianshilei1992, ye-luo.
Herald added subscribers: mattd, gchakrabarti, asavonic, kerbowa, tpr, jvesely.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jholewinski.
Herald added a project: clang.

The tools `amdgpu-arch` and `nvptx-arch` are used to query the supported
GPUs on a system to implement features like `--offload-arch=native` as
well as generally being useful for setting up tests. However, we
currently directly link these if they are availible. This patch removes
this because it causes many problems on the user not having the libaries
present or misconfigured at build time. Since these are built
unconditionally we shoudl keep the dependencies away from clang.

Fixes https://github.com/llvm/llvm-project/issues/62784


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150807

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt
  clang/tools/nvptx-arch/CMakeLists.txt
  clang/tools/nvptx-arch/NVPTXArch.cpp


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -30,7 +30,6 @@
 // and -help) will be hidden.
 static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
 
-#if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_NO_DEVICE = 100,
@@ -75,12 +74,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#include "cuda.h"
-llvm::Error loadCUDA() { return llvm::Error::success(); }
-
-#endif
 
 static int handleError(CUresult Err) {
   const char *ErrStr = nullptr;
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -9,12 +9,5 @@
 set(LLVM_LINK_COMPONENTS Support)
 add_clang_tool(nvptx-arch NVPTXArch.cpp)
 
-find_package(CUDAToolkit QUIET)
-
-# If we found the CUDA library directly we just dynamically link against it.
-if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 
4))
-  target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver clangBasic)
-else()
-  target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
-  target_link_libraries(nvptx-arch PRIVATE clangBasic)
-endif()
+target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
+target_link_libraries(nvptx-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- clang/tools/amdgpu-arch/CMakeLists.txt
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -10,12 +10,5 @@
 
 add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
 
-# If we find the HSA runtime we link with it directly.
-find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS 
/opt/rocm)
-if(hsa-runtime64_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P 
EQUAL 4))
-  set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
-  target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64 
clangBasic)
-else()
-  target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
-  target_link_libraries(amdgpu-arch PRIVATE clangBasic)
-endif()
+target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
+target_link_libraries(amdgpu-arch PRIVATE clangBasic)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -30,7 +30,6 @@
   OS << clang::getClangToolFullVersion("amdgpu-arch") << '\n';
 }
 
-#if DYNAMIC_HSA
 typedef enum {
   HSA_STATUS_SUCCESS = 0x0,
 } hsa_status_t;
@@ -80,18 +79,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#if defined(__has_include)
-#if __has_include("hsa/hsa.h")
-#include "hsa/hsa.h"
-#elif __has_include("hsa.h")
-#include "hsa.h"
-#endif
-#endif
-
-llvm::Error loadHSA() { return llvm::Error::success(); }
-#endif
 
 static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
   hsa_device_type_t DeviceType;


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -30,7 +30,6 @@
 // and -help) will be hidden.
 static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
 
-#if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_NO_DEVICE = 100,
@@ -75,12 +74,6 @@
 #undef DYNAMIC_INIT
   return llvm::Error::success();
 }
-#else
-
-#include "cuda.h"
-llvm::Error loadCUDA() { return llvm::Error::success(); }
-
-#endif
 
 static int handleError(CUresult Err) {
   const char *ErrStr = nullptr;
Index: clang/

[PATCH] D150461: [OpenMP] Naturally align internal global variables in the OpenMPIRBuilder

2023-05-12 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdd02984519ab: [OpenMP] Naturally align internal global 
variables in the OpenMPIRBuilder (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150461/new/

https://reviews.llvm.org/D150461

Files:
  clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/for_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
  clang/test/OpenMP/reduction_implicit_map.cpp
  clang/test/OpenMP/sections_reduction_task_codegen.cpp
  clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
  
clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/taskloop_reduction_codegen.cpp
  clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4225,10 +4225,12 @@
 // variable for possibly changing that to internal or private, or maybe
 // create different versions of the function for different OMP internal
 // variables.
-Elem.second = new GlobalVariable(
+auto *GV = new GlobalVariable(
 M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage,
 Constant::getNullValue(Ty), Elem.first(),
 /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace);
+GV->setAlignment(M.getDataLayout().getABITypeAlign(Ty));
+Elem.second = GV;
   }
 
   return cast(&*Elem.second);
Index: clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
===
--- clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -348,7 +348,7 @@
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // CHECK1-NEXT:[[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT:[[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]]
 // CHECK1-NEXT:[[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]]
@@ -370,7 +370,7 @@
 // CHECK1-NEXT:[[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT:[[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 // CHECK1-NEXT:[[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
@@ -712,7 +712,7 @@
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // CHECK1-NEXT:[[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT:[[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]]
 // CHECK1-NEXT:[[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]]
@@ -734,7 +734,7 @@
 // CHECK1-NEXT:[[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT:[[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 // CHECK1-NEXT:[[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = load ptr, p

[PATCH] D150461: [OpenMP] Naturally align internal global variables in the OpenMPIRBuilder

2023-05-12 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D150461#4338498 , @gchatelet wrote:

> quick question, did you try to build the openmp runtime as well to check if 
> the tests still pass?

I'm an OpenMP developer so I always have it built : ). Yes they still pass.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150461/new/

https://reviews.llvm.org/D150461

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150427: [AMDGPU] Non hostcall printf support for HIP

2023-05-12 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

Where does the runtime implementation of this live? I'm not very familiar with 
the HIP / hostcall ecosystem.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150427/new/

https://reviews.llvm.org/D150427

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150461: [OpenMP] Naturally align internal global variables in the OpenMPIRBuilder

2023-05-12 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, tianshilei1992, ABataev, JonChesterfield, 
tstellar, gchatelet.
Herald added subscribers: sunshaoce, guansong, hiraditya, yaxunl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, jplehr, sstefan1.
Herald added projects: clang, LLVM.

We use this helper to make several internal global variables during
codegen. currently we do not specify any alignment which allows the
alignment to be set incorrectly after some changes in how alignment was
handled. This patch explicitly aligns these variables to the natural
alignment as specified by the data layout

Fixes https://github.com/llvm/llvm-project/issues/62668


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150461

Files:
  clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/for_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_reduction_task_codegen.cpp
  clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
  clang/test/OpenMP/reduction_implicit_map.cpp
  clang/test/OpenMP/sections_reduction_task_codegen.cpp
  clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
  
clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/taskloop_reduction_codegen.cpp
  clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4225,10 +4225,12 @@
 // variable for possibly changing that to internal or private, or maybe
 // create different versions of the function for different OMP internal
 // variables.
-Elem.second = new GlobalVariable(
+auto *GV = new GlobalVariable(
 M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage,
 Constant::getNullValue(Ty), Elem.first(),
 /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace);
+GV->setAlignment(M.getDataLayout().getABITypeAlign(Ty));
+Elem.second = GV;
   }
 
   return cast(&*Elem.second);
Index: clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
===
--- clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -348,7 +348,7 @@
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // CHECK1-NEXT:[[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT:[[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]]
 // CHECK1-NEXT:[[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]]
@@ -370,7 +370,7 @@
 // CHECK1-NEXT:[[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT:[[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 // CHECK1-NEXT:[[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
@@ -712,7 +712,7 @@
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // CHECK1-NEXT:[[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT:[[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT:[[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:[[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 // CHECK1-NEXT:[[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]]
 // CHECK1-NEXT:[[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]]
@@ -734,7 +734,7 @@
 // CHECK1-NEXT:[[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // CHECK1-NEXT:store ptr [[TMP1]], ptr [[DOTAD

[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-11 Thread Joseph Huber via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG027aeec7da67: [Clang] Respect `-L` options when compiling 
directly for AMDGPU (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150013/new/

https://reviews.llvm.org/D150013

Files:
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
+// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -232,9 +232,11 @@
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
   // LIBRARY_PATH are included before user inputs and only supported on native
-  // toolchains.
+  // toolchains. Otherwise only add the '-L' arguments requested by the user.
   if (!TC.isCrossCompiling())
 addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+  else
+Args.AddAllArgs(CmdArgs, options::OPT_L);
 
   for (const auto &II : Inputs) {
 // If the current tool chain refers to an OpenMP offloading host, we


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
+// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -232,9 +232,11 @@
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
   // LIBRARY_PATH are included before user inputs and only supported on native
-  // toolchains.
+  // toolchains. Otherwise only add the '-L' arguments requested by the user.
   if (!TC.isCrossCompiling())
 addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+  else
+Args.AddAllArgs(CmdArgs, options::OPT_L);
 
   for (const auto &II : Inputs) {
 // If the current tool chain refers to an OpenMP offloading host, we
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-10 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 521202.
jhuber6 added a comment.

Updating, @yaxunl does this look good?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150013/new/

https://reviews.llvm.org/D150013

Files:
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
+// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -232,9 +232,11 @@
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
   // LIBRARY_PATH are included before user inputs and only supported on native
-  // toolchains.
+  // toolchains. Otherwise only add the '-L' arguments requested by the user.
   if (!TC.isCrossCompiling())
 addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+  else
+Args.AddAllArgs(CmdArgs, options::OPT_L);
 
   for (const auto &II : Inputs) {
 // If the current tool chain refers to an OpenMP offloading host, we


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
+// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -232,9 +232,11 @@
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
   // LIBRARY_PATH are included before user inputs and only supported on native
-  // toolchains.
+  // toolchains. Otherwise only add the '-L' arguments requested by the user.
   if (!TC.isCrossCompiling())
 addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+  else
+Args.AddAllArgs(CmdArgs, options::OPT_L);
 
   for (const auto &II : Inputs) {
 // If the current tool chain refers to an OpenMP offloading host, we
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-09 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

In D150013#4330452 , @MaskRay wrote:

> So it seems that there are configurations that we need -L (cross 
> compilation?). If we forward -L in some configurations, I think it'd be 
> better to do this consistently.
>
> The `LIBRARY_PATH` options seems not useful and the conditional `if (not 
> cross compiling) add LIBRARY_PATH` is more unfortunate. I wish that we don't 
> add more cases that we do something with `LIBRARY_PATH`.
> (IMO the `LIBRARY_PATH`  users can just fix their build system or use 
> `CCC_OVERRIDE_OPTIONS`)
>
> We definitely want to avoid more uses of `TC.isCrossCompiling()`.

The comment itself `// LIBRARY_PATH are included before user inputs and only 
supported on native toolchains.` suggests that something like this might be 
valid.

  // LIBRARY_PATH are included before user inputs and only supported on native
  // toolchains.
  if (!TC.isCrossCompiling())
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
  else 
Args.AddAllArgs(CmdArgs, options::OPT_L);



  




Comment at: clang/lib/Driver/ToolChains/AMDGPU.cpp:546
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);

jhuber6 wrote:
> MaskRay wrote:
> > jhuber6 wrote:
> > > yaxunl wrote:
> > > > jhuber6 wrote:
> > > > > yaxunl wrote:
> > > > > > AddLinkerInputs has code doing that, and it handles env var 
> > > > > > LIBRARY_PATH. However that code is disabled for AMDGPU because 
> > > > > > AMDGPU returns true for isCrossCompiling.
> > > > > > 
> > > > > > https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/CommonArgs.cpp#L236
> > > > > > 
> > > > > > It seems isCrossCompiling is solely for controlling whether to 
> > > > > > consume `-L`. If we want amdgpu toolchain to accept `-L`, we can 
> > > > > > simply let isCrossCompiling return false.
> > > > > Good catch, we could maybe set `isCrossCompiling` to false if 
> > > > > targeted directly by the user, e.g. `--target=amdgcn-amd-amdhsa` vs 
> > > > > `--offload-arch`.
> > > > That would be better. Thanks.
> > > It still is technically cross compiling, since we are building for a 
> > > target that does not match the system's architecture. The original code 
> > > that prevents passing `-L` was contributed by @MaskRay. I understand that 
> > > we may not want to pass `LIBRARY_PATH` defines, but what's the rationale 
> > > for not passing any `-L` options manually specified by the user?
> > The `LIBRARY_PATH` code had been there when I moved it in 2019. It'd be 
> > best not to rely on `LIBRARY_PATH`. I think that forwarding `-L` seems 
> > reasonable but I am not familiar with the amdgpu ecosystem..
> The linker is just `lld` so it should be the same conceptually. I'm just 
> figuring that even if the user is cross compiling we should respect `-L` 
> passed on the command line. Should I change this patch to make that change?
For offloading I think the problem might be that we don't want to forward `-L` 
arguments from the host. In that case, we might be able to apply `


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150013/new/

https://reviews.llvm.org/D150013

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-09 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPU.cpp:546
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);

MaskRay wrote:
> jhuber6 wrote:
> > yaxunl wrote:
> > > jhuber6 wrote:
> > > > yaxunl wrote:
> > > > > AddLinkerInputs has code doing that, and it handles env var 
> > > > > LIBRARY_PATH. However that code is disabled for AMDGPU because AMDGPU 
> > > > > returns true for isCrossCompiling.
> > > > > 
> > > > > https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/CommonArgs.cpp#L236
> > > > > 
> > > > > It seems isCrossCompiling is solely for controlling whether to 
> > > > > consume `-L`. If we want amdgpu toolchain to accept `-L`, we can 
> > > > > simply let isCrossCompiling return false.
> > > > Good catch, we could maybe set `isCrossCompiling` to false if targeted 
> > > > directly by the user, e.g. `--target=amdgcn-amd-amdhsa` vs 
> > > > `--offload-arch`.
> > > That would be better. Thanks.
> > It still is technically cross compiling, since we are building for a target 
> > that does not match the system's architecture. The original code that 
> > prevents passing `-L` was contributed by @MaskRay. I understand that we may 
> > not want to pass `LIBRARY_PATH` defines, but what's the rationale for not 
> > passing any `-L` options manually specified by the user?
> The `LIBRARY_PATH` code had been there when I moved it in 2019. It'd be best 
> not to rely on `LIBRARY_PATH`. I think that forwarding `-L` seems reasonable 
> but I am not familiar with the amdgpu ecosystem..
The linker is just `lld` so it should be the same conceptually. I'm just 
figuring that even if the user is cross compiling we should respect `-L` passed 
on the command line. Should I change this patch to make that change?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150013/new/

https://reviews.llvm.org/D150013

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150156: [OpenMP] Fix incorrect interop type for number of dependencies

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe494ebf9d09b: [OpenMP] Fix incorrect interop type for number 
of dependencies (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150156/new/

https://reviews.llvm.org/D150156

Files:
  clang/test/OpenMP/interop_irbuilder.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  llvm/test/Transforms/OpenMP/add_attributes.ll
  openmp/libomptarget/src/interop.cpp

Index: openmp/libomptarget/src/interop.cpp
===
--- openmp/libomptarget/src/interop.cpp
+++ openmp/libomptarget/src/interop.cpp
@@ -184,7 +184,7 @@
 void __tgt_interop_init(ident_t *LocRef, kmp_int32 Gtid,
 omp_interop_val_t *&InteropPtr,
 kmp_interop_type_t InteropType, kmp_int32 DeviceId,
-kmp_int64 Ndeps, kmp_depend_info_t *DepList,
+kmp_int32 Ndeps, kmp_depend_info_t *DepList,
 kmp_int32 HaveNowait) {
   kmp_int32 NdepsNoalias = 0;
   kmp_depend_info_t *NoaliasDepList = NULL;
Index: llvm/test/Transforms/OpenMP/add_attributes.ll
===
--- llvm/test/Transforms/OpenMP/add_attributes.ll
+++ llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -742,7 +742,7 @@
 
 declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32);
 
-declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i64, ptr, i32);
+declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i32, ptr, i32);
 
 declare void @__tgt_interop_use(ptr, i32, ptr, i32, i32, ptr, i32);
 
@@ -1398,7 +1398,7 @@
 ; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i64, ptr, i32)
+; CHECK: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i32, ptr, i32)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare void @__tgt_interop_use(ptr, i32, ptr, i32, i32, ptr, i32)
@@ -2046,7 +2046,7 @@
 ; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i64, ptr, i32)
+; OPTIMISTIC: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i32, ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare void @__tgt_interop_use(ptr, i32, ptr, i32, i32, ptr, i32)
@@ -2707,7 +2707,7 @@
 ; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)
 
 ; EXT-NOT: Function Attrs
-; EXT: declare void @__tgt_interop_init(ptr, i32 signext, ptr, i32 signext, i32 signext, i64, ptr, i32 signext)
+; EXT: declare void @__tgt_interop_init(ptr, i32 signext, ptr, i32 signext, i32 signext, i32, ptr, i32 signext)
 
 ; EXT-NOT: Function Attrs
 ; EXT: declare void @__tgt_interop_use(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3798,7 +3798,7 @@
 Device = ConstantInt::get(Int32, -1);
   Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
   if (NumDependences == nullptr) {
-NumDependences = ConstantInt::get(Int64, 0);
+NumDependences = ConstantInt::get(Int32, 0);
 PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
 DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
   }
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -402,7 +402,7 @@
 __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
 
 __OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
-  Int32, Int64, VoidPtr, Int32)
+  Int32, Int32, VoidPtr, Int32)
 __OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr,
   Int32, Int32, VoidPtr, Int32)
 __OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
Index: clang/test/OpenMP/interop_irbuilder.cpp
===
--- clang/test/OpenMP/interop_irbuilder.cpp
+++ clang/test/OpenMP/interop_irbuilder.cpp
@@ -10,23 +10,17 @@
   int D0, D1;
   omp_interop_t interop;
 
-#pragma omp interop init(target \
- : interop)
+#pragma omp interop init(target : interop)
 
-#pragma omp interop init(targetsync \
- : interop)
+#pragma omp interop init(targetsync :

< 1 2 3 4 5 6 7 8 9 10 >

101 - 200 of 1342 matches

Mail list logo