[PATCH] D150156: [OpenMP] Fix incorrect interop type for number of dependencies

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 520543.
jhuber6 added a comment.

Fix `add_attributes.ll`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150156/new/

https://reviews.llvm.org/D150156

Files:
  clang/test/OpenMP/interop_irbuilder.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  llvm/test/Transforms/OpenMP/add_attributes.ll
  openmp/libomptarget/src/interop.cpp

Index: openmp/libomptarget/src/interop.cpp
===
--- openmp/libomptarget/src/interop.cpp
+++ openmp/libomptarget/src/interop.cpp
@@ -184,7 +184,7 @@
 void __tgt_interop_init(ident_t *LocRef, kmp_int32 Gtid,
 omp_interop_val_t *&InteropPtr,
 kmp_interop_type_t InteropType, kmp_int32 DeviceId,
-kmp_int64 Ndeps, kmp_depend_info_t *DepList,
+kmp_int32 Ndeps, kmp_depend_info_t *DepList,
 kmp_int32 HaveNowait) {
   kmp_int32 NdepsNoalias = 0;
   kmp_depend_info_t *NoaliasDepList = NULL;
Index: llvm/test/Transforms/OpenMP/add_attributes.ll
===
--- llvm/test/Transforms/OpenMP/add_attributes.ll
+++ llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -742,7 +742,7 @@
 
 declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32);
 
-declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i64, ptr, i32);
+declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i32, ptr, i32);
 
 declare void @__tgt_interop_use(ptr, i32, ptr, i32, i32, ptr, i32);
 
@@ -1398,7 +1398,7 @@
 ; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i64, ptr, i32)
+; CHECK: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i32, ptr, i32)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare void @__tgt_interop_use(ptr, i32, ptr, i32, i32, ptr, i32)
@@ -2046,7 +2046,7 @@
 ; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i64, ptr, i32)
+; OPTIMISTIC: declare void @__tgt_interop_init(ptr, i32, ptr, i32, i32, i32, ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare void @__tgt_interop_use(ptr, i32, ptr, i32, i32, ptr, i32)
@@ -2707,7 +2707,7 @@
 ; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)
 
 ; EXT-NOT: Function Attrs
-; EXT: declare void @__tgt_interop_init(ptr, i32 signext, ptr, i32 signext, i32 signext, i64, ptr, i32 signext)
+; EXT: declare void @__tgt_interop_init(ptr, i32 signext, ptr, i32 signext, i32 signext, i32, ptr, i32 signext)
 
 ; EXT-NOT: Function Attrs
 ; EXT: declare void @__tgt_interop_use(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3798,7 +3798,7 @@
 Device = ConstantInt::get(Int32, -1);
   Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
   if (NumDependences == nullptr) {
-NumDependences = ConstantInt::get(Int64, 0);
+NumDependences = ConstantInt::get(Int32, 0);
 PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
 DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
   }
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -402,7 +402,7 @@
 __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
 
 __OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
-  Int32, Int64, VoidPtr, Int32)
+  Int32, Int32, VoidPtr, Int32)
 __OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr,
   Int32, Int32, VoidPtr, Int32)
 __OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
Index: clang/test/OpenMP/interop_irbuilder.cpp
===
--- clang/test/OpenMP/interop_irbuilder.cpp
+++ clang/test/OpenMP/interop_irbuilder.cpp
@@ -10,23 +10,17 @@
   int D0, D1;
   omp_interop_t interop;
 
-#pragma omp interop init(target \
- : interop)
+#pragma omp interop init(target : interop)
 
-#pragma omp interop init(targetsync \
- : interop)
+#pragma omp interop init(targetsync : interop)
 
-#pragma omp interop init(target \
- : interop) device(device_id)
+#pragma omp interop init(target : interop) device(

[PATCH] D150156: [OpenMP] Fix incorrect interop type for number of dependencies

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D150156#4328360 , @tianshilei1992 
wrote:

> Does this cause the IR issue?

Not sure, I just get an undefined symbol error in the linker now. Not sure if 
that means it's resolved or I just can't reproduce it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150156/new/

https://reviews.llvm.org/D150156

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150156: [OpenMP] Fix incorrect interop type for number of dependencies

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 520526.
jhuber6 added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Fix test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150156/new/

https://reviews.llvm.org/D150156

Files:
  clang/test/OpenMP/interop_irbuilder.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  openmp/libomptarget/src/interop.cpp

Index: openmp/libomptarget/src/interop.cpp
===
--- openmp/libomptarget/src/interop.cpp
+++ openmp/libomptarget/src/interop.cpp
@@ -184,7 +184,7 @@
 void __tgt_interop_init(ident_t *LocRef, kmp_int32 Gtid,
 omp_interop_val_t *&InteropPtr,
 kmp_interop_type_t InteropType, kmp_int32 DeviceId,
-kmp_int64 Ndeps, kmp_depend_info_t *DepList,
+kmp_int32 Ndeps, kmp_depend_info_t *DepList,
 kmp_int32 HaveNowait) {
   kmp_int32 NdepsNoalias = 0;
   kmp_depend_info_t *NoaliasDepList = NULL;
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3798,7 +3798,7 @@
 Device = ConstantInt::get(Int32, -1);
   Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
   if (NumDependences == nullptr) {
-NumDependences = ConstantInt::get(Int64, 0);
+NumDependences = ConstantInt::get(Int32, 0);
 PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
 DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
   }
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -402,7 +402,7 @@
 __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
 
 __OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
-  Int32, Int64, VoidPtr, Int32)
+  Int32, Int32, VoidPtr, Int32)
 __OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr,
   Int32, Int32, VoidPtr, Int32)
 __OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
Index: clang/test/OpenMP/interop_irbuilder.cpp
===
--- clang/test/OpenMP/interop_irbuilder.cpp
+++ clang/test/OpenMP/interop_irbuilder.cpp
@@ -10,23 +10,17 @@
   int D0, D1;
   omp_interop_t interop;
 
-#pragma omp interop init(target \
- : interop)
+#pragma omp interop init(target : interop)
 
-#pragma omp interop init(targetsync \
- : interop)
+#pragma omp interop init(targetsync : interop)
 
-#pragma omp interop init(target \
- : interop) device(device_id)
+#pragma omp interop init(target : interop) device(device_id)
 
-#pragma omp interop init(targetsync \
- : interop) device(device_id)
+#pragma omp interop init(targetsync : interop) device(device_id)
 
-#pragma omp interop use(interop) depend(in \
-: D0, D1) nowait
+#pragma omp interop use(interop) depend(in : D0, D1) nowait
 
-#pragma omp interop destroy(interop) depend(in \
-: D0, D1)
+#pragma omp interop destroy(interop) depend(in : D0, D1)
 }
 
 struct S {
@@ -39,23 +33,17 @@
   int device_id = 4;
   int D0, D1;
 
-#pragma omp interop init(target \
- : interop)
+#pragma omp interop init(target : interop)
 
-#pragma omp interop init(targetsync \
- : interop)
+#pragma omp interop init(targetsync : interop)
 
-#pragma omp interop init(target \
- : interop) device(device_id)
+#pragma omp interop init(target : interop) device(device_id)
 
-#pragma omp interop init(targetsync \
- : interop) device(device_id)
+#pragma omp interop init(targetsync : interop) device(device_id)
 
-#pragma omp interop use(interop) depend(in \
-: D0, D1) nowait
+#pragma omp interop use(interop) depend(in : D0, D1) nowait
 
-#pragma omp interop destroy(interop) depend(in \
-: D0, D1)
+#pragma omp interop destroy(interop) depend(in : D0, D1)
 }
 // CHECK-LABEL: @_Z5test1v(
 // CHECK-NEXT:  entry:
@@ -69,15 +57,15 @@
 // CHECK-NEXT:[[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8
 // CHECK-NEXT:store i32 4, ptr [[DEVICE_ID]], align 4
 // CHECK-NEXT:[[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-NEXT:call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[INTEROP]], i32 1, i

[PATCH] D150136: [Clang] Change default triple to LLVM_HOST_TRIPLE for the CUDA toolchain

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc2c917f7f668: [Clang] Change default triple to 
LLVM_HOST_TRIPLE for the CUDA toolchain (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150136/new/

https://reviews.llvm.org/D150136

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -711,8 +711,7 @@
 /// system's default triple if not provided.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
-: NVPTXToolChain(D, Triple,
- llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args,
+: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
  /*Freestanding=*/true) {}
 
 llvm::opt::DerivedArgList *


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -711,8 +711,7 @@
 /// system's default triple if not provided.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
-: NVPTXToolChain(D, Triple,
- llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args,
+: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
  /*Freestanding=*/true) {}
 
 llvm::opt::DerivedArgList *
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150136: [Clang] Change default triple to LLVM_HOST_TRIPLE for the CUDA toolchain

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D150136#4327570 , @tra wrote:

> The change may be an improvement, but we may still have a potential issue 
> here.
>
> E.g. ideally we may want to be able to cross-compile a CUDA app on a powerpc 
> or ARM build host targeting NVIDIA GPU on a x86 host. So, the compilation 
> tools would need to be found for the powerpc/arm host, but the the pair of 
> triples used during compilation would have to be x86 and nvptx.

So, this triple is only used for locating the CUDA library itself. In that case 
it's generally assumed that it will match whatever file structure the host 
computer is using. Specifically, right now all it's used for is 
`HostTriple.isOSWindows()`.

> In this situation the LLVM_HOST_TRIPLE would not be the right triple at all. 
> Does OpenMP currently handle the cross-compilation scenario above?

I don't think anyone's tried OpenMP with cross compilation. Most likely because 
it's only supported on Linux currently. I actually don't know what would happen 
if you tried.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150136/new/

https://reviews.llvm.org/D150136

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150136: [Clang] Change default triple to LLVM_HOST_TRIPLE for the CUDA toolchain

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: tra, yaxunl.
Herald added a subscriber: mattd.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

When cross-compiling NVPTX we use the triple to indicate which paths to
search for the CUDA toolchain. Currently this uses the default target
triple. This might not be exactly correct, as this is the default triple
used to compile binaries, not the host system. We want the host triple
because it indicates which folders should hold CUDA.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150136

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -711,8 +711,7 @@
 /// system's default triple if not provided.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
-: NVPTXToolChain(D, Triple,
- llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args,
+: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
  /*Freestanding=*/true) {}
 
 llvm::opt::DerivedArgList *


Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -711,8 +711,7 @@
 /// system's default triple if not provided.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
-: NVPTXToolChain(D, Triple,
- llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args,
+: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
  /*Freestanding=*/true) {}
 
 llvm::opt::DerivedArgList *
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-08 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPU.cpp:546
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);

yaxunl wrote:
> jhuber6 wrote:
> > yaxunl wrote:
> > > AddLinkerInputs has code doing that, and it handles env var LIBRARY_PATH. 
> > > However that code is disabled for AMDGPU because AMDGPU returns true for 
> > > isCrossCompiling.
> > > 
> > > https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/CommonArgs.cpp#L236
> > > 
> > > It seems isCrossCompiling is solely for controlling whether to consume 
> > > `-L`. If we want amdgpu toolchain to accept `-L`, we can simply let 
> > > isCrossCompiling return false.
> > Good catch, we could maybe set `isCrossCompiling` to false if targeted 
> > directly by the user, e.g. `--target=amdgcn-amd-amdhsa` vs `--offload-arch`.
> That would be better. Thanks.
It still is technically cross compiling, since we are building for a target 
that does not match the system's architecture. The original code that prevents 
passing `-L` was contributed by @MaskRay. I understand that we may not want to 
pass `LIBRARY_PATH` defines, but what's the rationale for not passing any `-L` 
options manually specified by the user?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150013/new/

https://reviews.llvm.org/D150013

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPU.cpp:546
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);

yaxunl wrote:
> AddLinkerInputs has code doing that, and it handles env var LIBRARY_PATH. 
> However that code is disabled for AMDGPU because AMDGPU returns true for 
> isCrossCompiling.
> 
> https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/CommonArgs.cpp#L236
> 
> It seems isCrossCompiling is solely for controlling whether to consume `-L`. 
> If we want amdgpu toolchain to accept `-L`, we can simply let 
> isCrossCompiling return false.
Good catch, we could maybe set `isCrossCompiling` to false if targeted directly 
by the user, e.g. `--target=amdgcn-amd-amdhsa` vs `--offload-arch`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150013/new/

https://reviews.llvm.org/D150013

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150013: [Clang] Respect `-L` options when compiling directly for AMDGPU

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: tra, yaxunl, JonChesterfield.
Herald added subscribers: kosarev, kerbowa, tpr, dstuttard, jvesely, kzhuravl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, wdng.
Herald added a project: clang.

The AMDGPU linker is `lld`, which has full support for standard features
like static libraries. Previously the AMDGPU toolchain did not forward
`-L` arguments so we could not tell it where to find certain libraries.
This patch simply forwards it like the other toolchains.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150013

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
+// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -544,6 +544,7 @@
   ArgStringList CmdArgs;
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
+// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -544,6 +544,7 @@
   ArgStringList CmdArgs;
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D149978#4323452 , @tra wrote:

>> I've discovered that LLVM adds -Wl,-fcolor-diagnostics
>
> Can you tell me where it's done?

`llvm/cmake/modules/HandleLLVMOptions.cmake:994`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D149978#4323328 , @tra wrote:

>> The latter is a little difficult,
>
> The more we dig, the more we want GPU-capable lld. :-)

My thoughts exactly. I had a small chat with @MaskRay about how difficult it 
would be to spin up support for NVPTX. But it would probably be a reasonably 
large project, and considering who I work for would be difficult for me to do 
it as more than a hobby.




Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:641
+  // by nvlink.
+  if (llvm::any_of(II.getInputArg().getValues(), [](StringRef Arg) {
+return Arg.equals("--color-diagnostics");

tra wrote:
> Can there ever be more than one value returned by 
> `II.getInputArg().getValues()`?
> 
> If so, we probably don't want to skip all of them if one of them is 
> `--color-diagnostics`. We may want to ignore only singleton 
> `--color-diagnostics` and let all other combinations error out.
Yeah, you can do `-Wl,arg1,arg2,arg3`. This was just because I couldn't think 
of an easy way to separate them out, considering that we rely on 
`renderAsInput` we'd need to create an entirely new arg. Which is doable, but I 
wasn't sure if it was worth the effort.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 519977.
jhuber6 added a comment.

Putting up the hack that works around my problem with `libc`. Definitely not a 
good solution though.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/cuda-cross-compiling.c


Index: clang/test/Driver/cuda-cross-compiling.c
===
--- clang/test/Driver/cuda-cross-compiling.c
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -77,3 +77,12 @@
 // RUN:   | FileCheck -check-prefix=LOWERING %s
 
 // LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" 
"--nvptx-lower-global-ctor-dtor"
+
+//
+// Test passing arguments directly to nvlink.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -Wl,-v -Wl,--color-diagnostics -### 
%s 2>&1 \
+// RUN:   | FileCheck -check-prefix=LINKER-ARGS %s
+
+// LINKER-ARGS: nvlink{{.*}}"-v"
+// LINKER-ARGS-NOT: nvlink{{.*}}"--color-diagnostics"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -607,36 +607,43 @@
   continue;
 }
 
-// Currently, we only pass the input files to the linker, we do not pass
-// any libraries that may be valid only for the host.
-if (!II.isFilename())
-  continue;
-
 // The 'nvlink' application performs RDC-mode linking when given a '.o'
 // file and device linking when given a '.cubin' file. We always want to
 // perform device linking, so just rename any '.o' files.
 // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
-auto InputFile = getToolChain().getInputFilename(II);
-if (llvm::sys::path::extension(InputFile) != ".cubin") {
-  // If there are no actions above this one then this is direct input and 
we
-  // can copy it. Otherwise the input is internal so a `.cubin` file should
-  // exist.
-  if (II.getAction() && II.getAction()->getInputs().size() == 0) {
-const char *CubinF =
-Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
-llvm::sys::path::stem(InputFile), "cubin"));
-if (std::error_code EC =
-llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
-  continue;
+if (II.isFilename()) {
+  auto InputFile = getToolChain().getInputFilename(II);
+  if (llvm::sys::path::extension(InputFile) != ".cubin") {
+// If there are no actions above this one then this is direct input and
+// we can copy it. Otherwise the input is internal so a `.cubin` file
+// should exist.
+if (II.getAction() && II.getAction()->getInputs().size() == 0) {
+  const char *CubinF =
+  Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
+  llvm::sys::path::stem(InputFile), "cubin"));
+  if (std::error_code EC =
+  llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
+continue;
 
-CmdArgs.push_back(CubinF);
+  CmdArgs.push_back(CubinF);
+} else {
+  SmallString<256> Filename(InputFile);
+  llvm::sys::path::replace_extension(Filename, "cubin");
+  CmdArgs.push_back(Args.MakeArgString(Filename));
+}
   } else {
-SmallString<256> Filename(InputFile);
-llvm::sys::path::replace_extension(Filename, "cubin");
-CmdArgs.push_back(Args.MakeArgString(Filename));
+CmdArgs.push_back(Args.MakeArgString(InputFile));
   }
-} else {
-  CmdArgs.push_back(Args.MakeArgString(InputFile));
+  continue;
+} else if (!II.isNothing()) {
+  // This option is commonly passed by LLVM by default, but isn't supported
+  // by nvlink.
+  if (llvm::any_of(II.getInputArg().getValues(), [](StringRef Arg) {
+return Arg.equals("--color-diagnostics");
+  }))
+continue;
+  // Render any remaining arguments as input to nvlink.
+  II.getInputArg().renderAsInput(Args, CmdArgs);
 }
   }
 


Index: clang/test/Driver/cuda-cross-compiling.c
===
--- clang/test/Driver/cuda-cross-compiling.c
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -77,3 +77,12 @@
 // RUN:   | FileCheck -check-prefix=LOWERING %s
 
 // LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
+
+//
+// Test passing arguments directly to nvlink.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -Wl,-v -Wl,--color-diagnostics -### %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=LINKER-ARGS %s
+
+// LINKER-ARGS: nvlink{{.*}}"-v"
+// LINKER-ARGS-NOT: nvlink{{.*}}"--color-diagnostics"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===

[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D149978#4323221 , @tra wrote:

> In D149978#4323210 , @jhuber6 wrote:
>
>> Somewhat annoying, I've discovered that LLVM adds `-Wl,-fcolor-diagnostics` 
>> which obviously isn't supported by `nvlink` so it fails while including this 
>> in `libc`'s CMake. Any clue if there's a way to work around that?
>
> I guess the options are to either filter out the automatically added option 
> or to avoid adding that particular argument if we know that the target is 
> NVPTX. The latter would probably be preferable as there would be only one 
> place where the decision is made.

The latter is a little difficult, the logic adds it based off of the host 
linker, but we explicitly override the host triple when we build via 
`--target=`. So there's be no way to turn it off in LLVM unless it's a blanket 
check on building `libc`. And since it's a global flag I can't just disable it 
only for the target. So I think the options are, either filter it out manually 
here or make a new flag called `-Xcuda-nvlink`, which I wouldn't like to do.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

The main reason I made this patch was to allow passing 
`--suppress-stack-size-warning` to `nvlink`. But it turns out it's a little 
more difficult there.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

Somewhat annoying, I've discovered that LLVM adds `-Wl,-fcolor-diagnostics` 
which obviously isn't supported by `nvlink` so it fails while including this in 
`libc`'s CMake. Any clue if there's a way to work around that?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 519957.
jhuber6 added a comment.

Addressing comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/cuda-cross-compiling.c


Index: clang/test/Driver/cuda-cross-compiling.c
===
--- clang/test/Driver/cuda-cross-compiling.c
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -77,3 +77,11 @@
 // RUN:   | FileCheck -check-prefix=LOWERING %s
 
 // LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" 
"--nvptx-lower-global-ctor-dtor"
+
+//
+// Test passing arguments directly to nvlink.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -Wl,-v -### %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=LINKER-ARGS %s
+
+// LINKER-ARGS: nvlink{{.*}}"-v"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -607,36 +607,37 @@
   continue;
 }
 
-// Currently, we only pass the input files to the linker, we do not pass
-// any libraries that may be valid only for the host.
-if (!II.isFilename())
-  continue;
-
 // The 'nvlink' application performs RDC-mode linking when given a '.o'
 // file and device linking when given a '.cubin' file. We always want to
 // perform device linking, so just rename any '.o' files.
 // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
-auto InputFile = getToolChain().getInputFilename(II);
-if (llvm::sys::path::extension(InputFile) != ".cubin") {
-  // If there are no actions above this one then this is direct input and 
we
-  // can copy it. Otherwise the input is internal so a `.cubin` file should
-  // exist.
-  if (II.getAction() && II.getAction()->getInputs().size() == 0) {
-const char *CubinF =
-Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
-llvm::sys::path::stem(InputFile), "cubin"));
-if (std::error_code EC =
-llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
-  continue;
+if (II.isFilename()) {
+  auto InputFile = getToolChain().getInputFilename(II);
+  if (llvm::sys::path::extension(InputFile) != ".cubin") {
+// If there are no actions above this one then this is direct input and
+// we can copy it. Otherwise the input is internal so a `.cubin` file
+// should exist.
+if (II.getAction() && II.getAction()->getInputs().size() == 0) {
+  const char *CubinF =
+  Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
+  llvm::sys::path::stem(InputFile), "cubin"));
+  if (std::error_code EC =
+  llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
+continue;
 
-CmdArgs.push_back(CubinF);
+  CmdArgs.push_back(CubinF);
+} else {
+  SmallString<256> Filename(InputFile);
+  llvm::sys::path::replace_extension(Filename, "cubin");
+  CmdArgs.push_back(Args.MakeArgString(Filename));
+}
   } else {
-SmallString<256> Filename(InputFile);
-llvm::sys::path::replace_extension(Filename, "cubin");
-CmdArgs.push_back(Args.MakeArgString(Filename));
+CmdArgs.push_back(Args.MakeArgString(InputFile));
   }
-} else {
-  CmdArgs.push_back(Args.MakeArgString(InputFile));
+  continue;
+} else if (!II.isNothing()) {
+  // Render any remaining arguments as input to nvlink.
+  II.getInputArg().renderAsInput(Args, CmdArgs);
 }
   }
 


Index: clang/test/Driver/cuda-cross-compiling.c
===
--- clang/test/Driver/cuda-cross-compiling.c
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -77,3 +77,11 @@
 // RUN:   | FileCheck -check-prefix=LOWERING %s
 
 // LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
+
+//
+// Test passing arguments directly to nvlink.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -Wl,-v -### %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=LINKER-ARGS %s
+
+// LINKER-ARGS: nvlink{{.*}}"-v"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -607,36 +607,37 @@
   continue;
 }
 
-// Currently, we only pass the input files to the linker, we do not pass
-// any libraries that may be valid only for the host.
-if (!II.isFilename())
-  continue;
-
 // The 'nvlink' application performs RDC-mode linking when given a '.o'
 // file and device linking when given a '.cubin' file. We always want to
 

[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 marked an inline comment as done.
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Cuda.cpp:594
-  // Add paths specified in LIBRARY_PATH environment variable as -L options.
-  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
-

tra wrote:
> Is removal of this line intentional? 
No, thanks that was from when I originally tried to use `AddLinkerInput` but it 
didn't work because of the `cubin` thing.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149978/new/

https://reviews.llvm.org/D149978

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149978: [Clang][NVPTX] Allow passing arguments to the linker while standalone

2023-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tra, yaxunl, MaskRay.
Herald added subscribers: mattd, gchakrabarti, asavonic.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

We support standalone compilation for the NVPTX architecture using
'nvlink' as our linker. Because of the special handling required to
transform input files to cubins, as nvlink expects for some reason, we
didn't use the standard `AddLinkerInput` method. However, this also
meant that we weren't forwarding options passed with `-Wl` to the
linker. Add this support in for the standalone toolchain path.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D149978

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/cuda-cross-compiling.c


Index: clang/test/Driver/cuda-cross-compiling.c
===
--- clang/test/Driver/cuda-cross-compiling.c
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -77,3 +77,11 @@
 // RUN:   | FileCheck -check-prefix=LOWERING %s
 
 // LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" 
"--nvptx-lower-global-ctor-dtor"
+
+//
+// Test passing arguments directly to nvlink.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -Wl,-v -### %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=LINKER-ARGS %s
+
+// LINKER-ARGS: nvlink{{.*}}"-v"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -590,9 +590,6 @@
   CmdArgs.push_back("-arch");
   CmdArgs.push_back(Args.MakeArgString(GPUArch));
 
-  // Add paths specified in LIBRARY_PATH environment variable as -L options.
-  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
-
   // Add paths for the default clang library path.
   SmallString<256> DefaultLibPath =
   llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -607,37 +604,42 @@
   continue;
 }
 
-// Currently, we only pass the input files to the linker, we do not pass
-// any libraries that may be valid only for the host.
-if (!II.isFilename())
-  continue;
-
 // The 'nvlink' application performs RDC-mode linking when given a '.o'
 // file and device linking when given a '.cubin' file. We always want to
 // perform device linking, so just rename any '.o' files.
 // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
-auto InputFile = getToolChain().getInputFilename(II);
-if (llvm::sys::path::extension(InputFile) != ".cubin") {
-  // If there are no actions above this one then this is direct input and 
we
-  // can copy it. Otherwise the input is internal so a `.cubin` file should
-  // exist.
-  if (II.getAction() && II.getAction()->getInputs().size() == 0) {
-const char *CubinF =
-Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
-llvm::sys::path::stem(InputFile), "cubin"));
-if (std::error_code EC =
-llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
-  continue;
+if (II.isFilename()) {
+  auto InputFile = getToolChain().getInputFilename(II);
+  if (llvm::sys::path::extension(InputFile) != ".cubin") {
+// If there are no actions above this one then this is direct input and
+// we can copy it. Otherwise the input is internal so a `.cubin` file
+// should exist.
+if (II.getAction() && II.getAction()->getInputs().size() == 0) {
+  const char *CubinF =
+  Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
+  llvm::sys::path::stem(InputFile), "cubin"));
+  if (std::error_code EC =
+  llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
+continue;
 
-CmdArgs.push_back(CubinF);
+  CmdArgs.push_back(CubinF);
+} else {
+  SmallString<256> Filename(InputFile);
+  llvm::sys::path::replace_extension(Filename, "cubin");
+  CmdArgs.push_back(Args.MakeArgString(Filename));
+}
   } else {
-SmallString<256> Filename(InputFile);
-llvm::sys::path::replace_extension(Filename, "cubin");
-CmdArgs.push_back(Args.MakeArgString(Filename));
+CmdArgs.push_back(Args.MakeArgString(InputFile));
   }
-} else {
-  CmdArgs.push_back(Args.MakeArgString(InputFile));
+  continue;
 }
+
+// In some error cases, the input could be Nothing; skip those.
+if (II.isNothing())
+  continue;
+
+// Render any remaining arguments as input to nvlink.
+II.getInputArg().renderAsInput(Args, CmdArgs);
   }
 
   C.addCommand(std::make_unique(


Index: clang/test/Driver/cuda-cross-compiling.c
===
--- clang/test/Driv

[PATCH] D149451: [NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors

2023-05-04 Thread Joseph Huber via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf05ce9045af4: [NVPTX] Add NVPTXCtorDtorLoweringPass to 
handle global ctors / dtors (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D149451?vs=518604&id=519448#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/Cuda.h
  clang/test/Driver/cuda-cross-compiling.c
  llvm/lib/Target/NVPTX/CMakeLists.txt
  llvm/lib/Target/NVPTX/NVPTX.h
  llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
  llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
  llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

Index: llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
===
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \
+; RUN: -nvptx-lower-global-ctor-dtor-id=unique_id < %s | FileCheck %s --check-prefix=GLOBAL
+
+; Make sure we get the same result if we run multiple times
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+
+@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
+@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
+
+; CHECK-NOT: @llvm.global_ctors
+; CHECK-NOT: @llvm.global_dtors
+
+; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
+; GLOBAL: @__init_array_object_foo_unique_id_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; GLOBAL: @__fini_array_object_bar_unique_id_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; GLOBAL: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_unique_id_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_unique_id_1 to ptr)], section "llvm.metadata"
+
+; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
+; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;
+
+define internal void @foo() {
+  ret void
+}
+
+define internal void @bar() {
+  ret void
+}
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "NVPTXAliasAnalysis.h"
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXAtomicLower.h"
+#include "NVPTXCtorDtorLowering.h"
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXTargetObjectFile.h"
@@ -68,8 +69,10 @@
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXAtomicLowerPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 void initializeNVVMIntrRangePass(PassRegistry &);
@@ -95,6 +98,7 @@
   initializeNVPTXAtomicLowerPass(PR);
   initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
+  initializeNVPTXCtorDtorLoweringLegacyPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
   initializeNVPTXProxyRegErasurePass(PR);
   initializeNVPTXDAGToDAGISelPass(PR);
@@ -249,6 +253,10 @@
   PB.registerPipelineParsingCallback(
   [](StringRef PassName, ModulePassManager &PM,
  ArrayRef) {
+if (PassName == "nvptx-lower-ctor-dtor") {
+  PM.addPass(NVPTXCtorDtorLoweringPass());
+  return true;
+}
 if (PassName == "generic-to-nvvm") {
   PM.addPass(Gen

[PATCH] D149451: [NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors

2023-05-01 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 518604.
jhuber6 added a comment.

Add option to allow overriding the global hash.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/Cuda.h
  clang/test/Driver/cuda-cross-compiling.c
  llvm/lib/Target/NVPTX/CMakeLists.txt
  llvm/lib/Target/NVPTX/NVPTX.h
  llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
  llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
  llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

Index: llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
===
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \
+; RUN: -nvptx-lower-global-ctor-dtor-id=unique_id < %s | FileCheck %s --check-prefix=GLOBAL
+
+; Make sure we get the same result if we run multiple times
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+
+@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
+@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
+
+; CHECK-NOT: @llvm.global_ctors
+; CHECK-NOT: @llvm.global_dtors
+
+; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
+; GLOBAL: @__init_array_object_foo_unique_id_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; GLOBAL: @__fini_array_object_bar_unique_id_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; GLOBAL: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_unique_id_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_unique_id_1 to ptr)], section "llvm.metadata"
+
+; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
+; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;
+
+define internal void @foo() {
+  ret void
+}
+
+define internal void @bar() {
+  ret void
+}
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "NVPTXAliasAnalysis.h"
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXAtomicLower.h"
+#include "NVPTXCtorDtorLowering.h"
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXTargetObjectFile.h"
@@ -68,8 +69,10 @@
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXAtomicLowerPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 void initializeNVVMIntrRangePass(PassRegistry &);
@@ -95,6 +98,7 @@
   initializeNVPTXAtomicLowerPass(PR);
   initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
+  initializeNVPTXCtorDtorLoweringLegacyPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
   initializeNVPTXProxyRegErasurePass(PR);
   initializeNVPTXDAGToDAGISelPass(PR);
@@ -249,6 +253,10 @@
   PB.registerPipelineParsingCallback(
   [](StringRef PassName, ModulePassManager &PM,
  ArrayRef) {
+if (PassName == "nvptx-lower-ctor-dtor") {
+  PM.addPass(NVPTXCtorDtorLoweringPass());
+  return true;
+}
 if (PassName == "generic-to-nvvm") {
   PM.addPass(GenericToNVVMPass());
   return true;
@@ -369,6 +377,7 @@
   }
 
   addPass(createAtomicExpandPass());
+  addPass(createNVPTXCtorDtorLoweringLegacyPass());
 
 

[PATCH] D149451: [NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors

2023-05-01 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp:58
+((IsCtor ? "__init_array_object_" : "__fini_array_object_") +
+ F->getName() + "_" + getHash(M.getName()) + "_" +
+ std::to_string(Priority))

tra wrote:
> tra wrote:
> > jhuber6 wrote:
> > > tra wrote:
> > > > Source file name may be a little bit better, though it's still easy to 
> > > > clash if someone does `cd A; clang ./foo.c; cd ../B; clang ./foo.c` and 
> > > > the file name uses relative paths.
> > > > 
> > > > I think we'll need a way to override this unique suffix explicitly as 
> > > > an escape hatch for cases where someone runs into a clash.
> > > I figured it'd be good enough since this is admittedly *very* niche. So 
> > > someone would need to have a file called `foo.c` that also had a 
> > > constructor called `foo` in it. For it to clash. Isn't it too late to 
> > > grab the source filename while we're in the backend lowering stage?
> > > someone would need to have a file called foo.c that also had a 
> > > constructor called foo in it
> > 
> > Unlikely != impossible.  It's a trade-off between the hassle of 
> > implementing the plan B and the hassle of debugging and working around the 
> > clash for someone who runs into this.
> > On one hand that's indeed unlikely to happen, but, given enough exposure, 
> > someone/somewhere will run into it and they will likely be ill-equipped to 
> > even tell what's going on.
> > In general, compiler options are logistically much easier to deal with 
> > compared to having to change the source code.
> > 
> > > Isn't it too late to grab the source filename while we're in the backend 
> > > lowering stage?
> > 
> > The module already has the file name recorded and available via 
> > `llvm::Module::getSourceFileName()`, so it's as easy to get as the module 
> > name.
> On the second thought, do you think we'll ever end up running this pass with 
> a module created purely in memory w/o having a source file name. Or, perhaps 
> even without the module name either?
> 
> Even the hash of the IR itself will not be sufficient. Users are allowed to 
> compile and link completely identical TUs as long as they don't have 
> conflicting names.  I can imagine some sort of "plugin" module with only 
> private symbols, but which has initializers to register stuff on startup. Two 
> identical instances of such a module should be able to work, but they would 
> end up with identical hash in this scheme. I do not see any way to 
> automatically disambiguate them, short of using random numbers, but that 
> would make compilation results unstable.
> 
> I still think we need to be able to provide the uniquifier manually via an 
> option.
> 
> 
Yeah, I'm assuming they would just get a name conflict in that case. We can 
definitely add a special option that just adds some noise.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149451: [NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors

2023-05-01 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 518553.
jhuber6 added a comment.

Changing to use source filename.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/Cuda.h
  clang/test/Driver/cuda-cross-compiling.c
  llvm/lib/Target/NVPTX/CMakeLists.txt
  llvm/lib/Target/NVPTX/NVPTX.h
  llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
  llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
  llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

Index: llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
===
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s
+
+; Make sure we get the same result if we run multiple times
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+
+@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
+@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
+
+; CHECK-NOT: @llvm.global_ctors
+; CHECK-NOT: @llvm.global_dtors
+
+; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
+
+; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
+; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;
+
+define internal void @foo() {
+  ret void
+}
+
+define internal void @bar() {
+  ret void
+}
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "NVPTXAliasAnalysis.h"
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXAtomicLower.h"
+#include "NVPTXCtorDtorLowering.h"
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXTargetObjectFile.h"
@@ -68,8 +69,10 @@
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXAtomicLowerPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 void initializeNVVMIntrRangePass(PassRegistry &);
@@ -95,6 +98,7 @@
   initializeNVPTXAtomicLowerPass(PR);
   initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
+  initializeNVPTXCtorDtorLoweringLegacyPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
   initializeNVPTXProxyRegErasurePass(PR);
   initializeNVPTXDAGToDAGISelPass(PR);
@@ -249,6 +253,10 @@
   PB.registerPipelineParsingCallback(
   [](StringRef PassName, ModulePassManager &PM,
  ArrayRef) {
+if (PassName == "nvptx-lower-ctor-dtor") {
+  PM.addPass(NVPTXCtorDtorLoweringPass());
+  return true;
+}
 if (PassName == "generic-to-nvvm") {
   PM.addPass(GenericToNVVMPass());
   return true;
@@ -369,6 +377,7 @@
   }
 
   addPass(createAtomicExpandPass());
+  addPass(createNVPTXCtorDtorLoweringLegacyPass());
 
   // === LSR and other generic IR passes ===
   TargetPassConfig::addIRPasses();
Index: llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
===
--- /dev/null
+++ llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
@@ -0,0 +1,30 @@
+//===-- NVPTXCtorDtorLowering.h *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-

[PATCH] D149451: [NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors

2023-05-01 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp:58
+((IsCtor ? "__init_array_object_" : "__fini_array_object_") +
+ F->getName() + "_" + getHash(M.getName()) + "_" +
+ std::to_string(Priority))

tra wrote:
> Source file name may be a little bit better, though it's still easy to clash 
> if someone does `cd A; clang ./foo.c; cd ../B; clang ./foo.c` and the file 
> name uses relative paths.
> 
> I think we'll need a way to override this unique suffix explicitly as an 
> escape hatch for cases where someone runs into a clash.
I figured it'd be good enough since this is admittedly *very* niche. So someone 
would need to have a file called `foo.c` that also had a constructor called 
`foo` in it. For it to clash. Isn't it too late to grab the source filename 
while we're in the backend lowering stage?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149451: [NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors

2023-04-29 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 518262.
jhuber6 added a comment.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

Update to only enable this when in "freestanding" mode. Also add a hash based 
on the module name to the global.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149451/new/

https://reviews.llvm.org/D149451

Files:
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/Cuda.h
  clang/test/Driver/cuda-cross-compiling.c
  llvm/lib/Target/NVPTX/CMakeLists.txt
  llvm/lib/Target/NVPTX/NVPTX.h
  llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
  llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
  llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
  llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

Index: llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
===
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s
+
+; Make sure we get the same result if we run multiple times
+; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
+; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+
+@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
+@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
+
+; CHECK-NOT: @llvm.global_ctors
+; CHECK-NOT: @llvm.global_dtors
+
+; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
+; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
+
+; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
+; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;
+
+define internal void @foo() {
+  ret void
+}
+
+define internal void @bar() {
+  ret void
+}
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "NVPTXAliasAnalysis.h"
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXAtomicLower.h"
+#include "NVPTXCtorDtorLowering.h"
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXTargetObjectFile.h"
@@ -68,8 +69,10 @@
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXAtomicLowerPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
 void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 void initializeNVVMIntrRangePass(PassRegistry &);
@@ -95,6 +98,7 @@
   initializeNVPTXAtomicLowerPass(PR);
   initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
+  initializeNVPTXCtorDtorLoweringLegacyPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
   initializeNVPTXProxyRegErasurePass(PR);
   initializeNVPTXDAGToDAGISelPass(PR);
@@ -249,6 +253,10 @@
   PB.registerPipelineParsingCallback(
   [](StringRef PassName, ModulePassManager &PM,
  ArrayRef) {
+if (PassName == "nvptx-lower-ctor-dtor") {
+  PM.addPass(NVPTXCtorDtorLoweringPass());
+  return true;
+}
 if (PassName == "generic-to-nvvm") {
   PM.addPass(GenericToNVVMPass());
   return true;
@@ -369,6 +377,7 @@
   }
 
   addPass(createAtomicExpandPass());
+  addPass(createNVPTXCtorDtorLoweringLegacyPass());
 
   // === LSR and other generic IR passes ===
   TargetPassConfig::addIRPasses();
Index: llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
===
--- /dev/null
+++ llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
@@ -0,0 +1,30 @@
+//===-- NVPTXCtorDtorLowering.h *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for l

[PATCH] D149019: [Clang] Accept and forward `-fconvergent-functions` in the driver

2023-04-24 Thread Joseph Huber via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf66576016290: [Clang] Accept and forward 
`-fconvergent-functions` in the driver (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D149019?vs=516188&id=516400#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149019/new/

https://reviews.llvm.org/D149019

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3731,9 +3731,9 @@
   Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
 && Opts.OpenCLVersion == 200);
 
-  Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice ||
- Args.hasArg(OPT_fconvergent_functions);
+  Opts.ConvergentFunctions = Args.hasArg(OPT_fconvergent_functions) ||
+ Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
+ Opts.SYCLIsDevice;
 
   Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
   if (!Opts.NoBuiltin)
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5700,6 +5700,8 @@
 options::OPT_fno_unique_internal_linkage_names);
   Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
 options::OPT_fno_unique_basic_block_section_names);
+  Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
+options::OPT_fno_convergent_functions);
 
   if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -967,8 +967,10 @@
   MetaVarName<"">;
 def c : Flag<["-"], "c">, Flags<[NoXarchOption, FlangOption]>, 
Group,
   HelpText<"Only run preprocess, compile, and assemble steps">;
-def fconvergent_functions : Flag<["-"], "fconvergent-functions">, 
Group, Flags<[CC1Option]>,
-  HelpText<"Assume functions may be convergent">;
+defm convergent_functions : BoolFOption<"convergent-functions",
+  LangOpts<"ConvergentFunctions">, DefaultFalse,
+  NegFlag,
+  PosFlag>;
 
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3731,9 +3731,9 @@
   Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
 && Opts.OpenCLVersion == 200);
 
-  Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice ||
- Args.hasArg(OPT_fconvergent_functions);
+  Opts.ConvergentFunctions = Args.hasArg(OPT_fconvergent_functions) ||
+ Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
+ Opts.SYCLIsDevice;
 
   Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
   if (!Opts.NoBuiltin)
Index: c

[PATCH] D149028: [Clang] Always pass `-fconvergent-functions` for GPU targets

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D149028#4290831 , @JonChesterfield 
wrote:

> I think this is sensible. Passing fno-convergent-functions presumably changes 
> the default?
>
> I wonder if we should adopt this and then remove the checks for each of the 
> GPU programming models

That would probably make sense, actually that's a good point. MaskRay wanted me 
to remove `-fno-convergent-functions` in the previous patch which prevents us 
from doing that. I think I'll add it back in because I like the idea of being 
able to override defaults.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149028/new/

https://reviews.llvm.org/D149028

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149028: [Clang] Always pass `-fconvergent-functions` for GPU targets

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 516198.
jhuber6 added a comment.
Herald added subscribers: mattd, asavonic.

Add test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149028/new/

https://reviews.llvm.org/D149028

Files:
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGen/nvptx_attributes.c


Index: clang/test/CodeGen/nvptx_attributes.c
===
--- clang/test/CodeGen/nvptx_attributes.c
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s 
-o - | FileCheck %s
 
-// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK: Function Attrs: convergent noinline nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@foo
 // CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3737,7 +3737,7 @@
 && Opts.OpenCLVersion == 200);
 
   Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice ||
+ Opts.SYCLIsDevice || T.isNVPTX() || T.isAMDGPU() 
||
  Args.hasArg(OPT_fconvergent_functions);
 
   Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;


Index: clang/test/CodeGen/nvptx_attributes.c
===
--- clang/test/CodeGen/nvptx_attributes.c
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
 
-// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK: Function Attrs: convergent noinline nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@foo
 // CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3737,7 +3737,7 @@
 && Opts.OpenCLVersion == 200);
 
   Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice ||
+ Opts.SYCLIsDevice || T.isNVPTX() || T.isAMDGPU() ||
  Args.hasArg(OPT_fconvergent_functions);
 
   Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149028: [Clang] Always pass `-fconvergent-functions` for GPU targets

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, jdoerfert, tianshilei1992, tra, 
yaxunl.
Herald added a subscriber: kosarev.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

GPU targets like `nvptx64-nvidia-cuda` and `amdgcn-amd-amdhsa` require
that we have convergent functions on. Currently we apply this to all the
offloading languages, but this patch applies it based on the triple
directly. This is so users can specify `--target=nvptx64-nvidia-cuda`
and get this behaviour as an implementation detail of the architecture.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D149028

Files:
  clang/lib/Frontend/CompilerInvocation.cpp


Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3737,7 +3737,7 @@
 && Opts.OpenCLVersion == 200);
 
   Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice ||
+ Opts.SYCLIsDevice || T.isNVPTX() || T.isAMDGPU() 
||
  Args.hasArg(OPT_fconvergent_functions);
 
   Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;


Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3737,7 +3737,7 @@
 && Opts.OpenCLVersion == 200);
 
   Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
- Opts.SYCLIsDevice ||
+ Opts.SYCLIsDevice || T.isNVPTX() || T.isAMDGPU() ||
  Args.hasArg(OPT_fconvergent_functions);
 
   Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149019: [Clang] Accept and forward `-fconvergent-functions` in the driver

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 516188.
jhuber6 added a comment.

Remove cc1 negative option


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149019/new/

https://reviews.llvm.org/D149019

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" 
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5700,6 +5700,8 @@
 options::OPT_fno_unique_internal_linkage_names);
   Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
 options::OPT_fno_unique_basic_block_section_names);
+  Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
+options::OPT_fno_convergent_functions);
 
   if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -967,8 +967,10 @@
   MetaVarName<"">;
 def c : Flag<["-"], "c">, Flags<[NoXarchOption, FlangOption]>, 
Group,
   HelpText<"Only run preprocess, compile, and assemble steps">;
-def fconvergent_functions : Flag<["-"], "fconvergent-functions">, 
Group, Flags<[CC1Option]>,
-  HelpText<"Assume functions may be convergent">;
+defm convergent_functions : BoolFOption<"convergent-functions",
+  LangOpts<"ConvergentFunctions">, DefaultFalse,
+  NegFlag,
+  PosFlag>;
 
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" 
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5700,6 +5700,8 @@
 options::OPT_fno_unique_internal_linkage_names);
   Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
 options::OPT_fno_unique_basic_block_section_names);
+  Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
+options::OPT_fno_convergent_functions);
 
   if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -967,8 +967,10 @@
   MetaVarName<"">;
 def c : Flag<["-"], "c">, Flags<[NoXarchOption, FlangOption]>, Group,
   HelpText<"Only run preprocess, compile, and assemble steps">;
-def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group, Flags<[CC1Option]>,
-  HelpText<"Assume functions may be convergent">;
+defm convergent_functions : BoolFOption<"convergent-functions",
+  LangOpts<"ConvergentFunctions">, DefaultFalse,
+  NegFlag,
+  PosFlag>;
 
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149019: [Clang] Accept and forward `-fconvergent-functions` in the driver

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 516167.
jhuber6 added a comment.

Fix formatting


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149019/new/

https://reviews.llvm.org/D149019

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" 
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5700,6 +5700,8 @@
 options::OPT_fno_unique_internal_linkage_names);
   Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
 options::OPT_fno_unique_basic_block_section_names);
+  Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
+options::OPT_fno_convergent_functions);
 
   if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -967,8 +967,10 @@
   MetaVarName<"">;
 def c : Flag<["-"], "c">, Flags<[NoXarchOption, FlangOption]>, 
Group,
   HelpText<"Only run preprocess, compile, and assemble steps">;
-def fconvergent_functions : Flag<["-"], "fconvergent-functions">, 
Group, Flags<[CC1Option]>,
-  HelpText<"Assume functions may be convergent">;
+defm convergent_functions : BoolFOption<"convergent-functions",
+  LangOpts<"ConvergentFunctions">, DefaultFalse,
+  NegFlag,
+  PosFlag>;
 
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" 
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5700,6 +5700,8 @@
 options::OPT_fno_unique_internal_linkage_names);
   Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
 options::OPT_fno_unique_basic_block_section_names);
+  Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
+options::OPT_fno_convergent_functions);
 
   if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -967,8 +967,10 @@
   MetaVarName<"">;
 def c : Flag<["-"], "c">, Flags<[NoXarchOption, FlangOption]>, Group,
   HelpText<"Only run preprocess, compile, and assemble steps">;
-def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group, Flags<[CC1Option]>,
-  HelpText<"Assume functions may be convergent">;
+defm convergent_functions : BoolFOption<"convergent-functions",
+  LangOpts<"ConvergentFunctions">, DefaultFalse,
+  NegFlag,
+  PosFlag>;
 
 def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149019: [Clang] Accept and forward `-fconvergent-functions` in the driver

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D149019#4290573 , @JonChesterfield 
wrote:

> Can't reasonably see the semantic change between all the whitespace reformat, 
> please split those two. E.g. use git-clang-format to only fix formatting in 
> the part you're changing, or commit a clang-format only change first and then 
> rebase this.

Sorry that was an accidental format, I'll fix it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149019/new/

https://reviews.llvm.org/D149019

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D149019: [Clang] Accept and forward `-fconvergent-functions` in the driver

2023-04-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tra, yaxunl, tianshilei1992, MaskRay, 
jdoerfert.
Herald added subscribers: kosarev, abrachet, phosek, kerbowa, s.egerton, 
simoncook, asb, jvesely.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, pcwang-thead.
Herald added a project: clang.

Currently the `-fconvergent-functions` option is primarily used by GPU
toolchains to enforce convergent operations in line with the semantics.
This option previously was only supported via `-Xclang` and would show
up as unused if passed to the driver. This patch allows the driver to
forward it. This is mostly useful for users wishing to target GPU
toolchains directly via `--target=` without an offloading runtime.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D149019

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-toolchain.c

Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -11,6 +11,6 @@
 // DWARF_VER: "-dwarf-version=5"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
-// RUN:   -flto %s 2>&1 | FileCheck -check-prefix=LTO %s
-// LTO: clang{{.*}} "-flto=full"
+// RUN:   -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
+// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" 
 // LTO: ld.lld{{.*}}-plugin-opt=mcpu=gfx906
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -82,8 +82,8 @@
   if (Args.hasArg(options::OPT_static))
 if (const Arg *A =
 Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
-  D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
-  << "-static";
+  D.Diag(diag::err_drv_argument_not_allowed_with)
+  << A->getAsString(Args) << "-static";
 }
 
 // Add backslashes to escape spaces and other backslashes.
@@ -140,8 +140,8 @@
 /// parameter in reciprocal argument strings. Return false if there is an error
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
-static bool getRefinementStep(StringRef In, const Driver &D,
-  const Arg &A, size_t &Position) {
+static bool getRefinementStep(StringRef In, const Driver &D, const Arg &A,
+  size_t &Position) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
   if (Position != StringRef::npos) {
@@ -260,7 +260,8 @@
 
 // If the precision was not specified, also mark the double and half entry
 // as found.
-if (ValBase.back() != 'f' && ValBase.back() != 'd' && ValBase.back() != 'h') {
+if (ValBase.back() != 'f' && ValBase.back() != 'd' &&
+ValBase.back() != 'h') {
   OptionStrings[ValBase.str() + 'd'] = true;
   OptionStrings[ValBase.str() + 'h'] = true;
 }
@@ -404,7 +405,7 @@
 }
 
 static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
-  switch (Triple.getArch()){
+  switch (Triple.getArch()) {
   default:
 return false;
   case llvm::Triple::arm:
@@ -615,7 +616,7 @@
 
 /// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
 static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
-   ArgStringList &CmdArgs) {
+ArgStringList &CmdArgs) {
   for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
 options::OPT_fcoverage_prefix_map_EQ)) {
 StringRef Map = A->getValue();
@@ -711,13 +712,12 @@
   CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
 CSPGOGenerateArg = nullptr;
 
-  auto *ProfileGenerateArg = Args.getLastArg(
-  options::OPT_fprofile_instr_generate,
-  options::OPT_fprofile_instr_generate_EQ,
-  options::OPT_fno_profile_instr_generate);
-  if (ProfileGenerateArg &&
-  ProfileGenerateArg->getOption().matches(
-  options::OPT_fno_profile_instr_generate))
+  auto *ProfileGenerateArg =
+  Args.getLastArg(options::OPT_fprofile_instr_generate,
+  options::OPT_fprofile_instr_generate_EQ,
+  options::OPT_fno_profile_instr_generate);
+  if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches(
+options::OPT_fno_profile_instr_generate))
 ProfileGenerateArg = nullptr;
 
   if (PGOGenerateArg && ProfileGenerateArg)
@@ -1166,15 +1166,15 @@
   (getToolChain().getTriple().isNVPTX() ||
getToolChain().ge

[PATCH] D148849: [OpenMP-OPT] Remove limit for heap to stack conversions of __kmpc_alloc_shared allocations

2023-04-20 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 accepted this revision.
jhuber6 added a comment.
This revision is now accepted and ready to land.

LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148849/new/

https://reviews.llvm.org/D148849

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147572: [Clang][OpenMP] Fix failure with team-wide allocated variable

2023-04-17 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 accepted this revision.
jhuber6 added a comment.
This revision is now accepted and ready to land.

LGTM unless anyone else has any concerns.




Comment at: clang/test/OpenMP/target_team_variable_codegen.cpp:33
+//.
+// CHECK-NVIDIA: @local_a = internal addrspace(3) global [10 x i32] 
zeroinitializer, align 4
+//.

doru1004 wrote:
> jhuber6 wrote:
> > jdoerfert wrote:
> > > doru1004 wrote:
> > > > jhuber6 wrote:
> > > > > Shouldn't the Nvidia version also be undefined? Not sure why this 
> > > > > should vary depending on the target.
> > > > Perhaps NVIDIA code path can tolerate a zeroinitializer? I don't want 
> > > > to change it if it's not needed. I am basing this check on the code 
> > > > path for AMD GPUs and the initial bug that was reported.
> > > for AS 3 we should make it always poison.
> > We should probably change this in `HeadToShared` in `OpenMPOpt` as well.
> Happy to remove the guard and have it always use poison for both NVIDIA and 
> AMD.
These should be a single check line now.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147572/new/

https://reviews.llvm.org/D147572

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-16 Thread Joseph Huber via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1663016b41d7: [clang-tidy] Prevent 
`llvmlibc-inline-function-decl` triggering on lambdas (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D148444?vs=514001&id=514026#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148444/new/

https://reviews.llvm.org/D148444

Files:
  clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp


Index: 
clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with 
the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be 
tagged with the LIBC_INLINE macro; the macro should be placed at the beginning 
of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' 
must be tagged with the LIBC_INLINE macro; the macro should be placed at the 
beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged 
with the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // 
LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,12 @@
 HeaderFileExtensions))
 return;
 
+  // Ignore lambda functions as they are internal and implicit.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl)) {
+if (MethodDecl->getParent()->isLambda())
+  return;
+  }
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);


Index: clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,12 @@
 HeaderFileExtensions))
 return;
 
+  // Ignore lambda functions as they are internal and implicit.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl)) {
+if (MethodDecl->getParent()->isLambda())
+  return;
+  }
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 514001.
jhuber6 added a comment.

Rebasing on main


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148444/new/

https://reviews.llvm.org/D148444

Files:
  clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp


Index: 
clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with 
the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be 
tagged with the LIBC_INLINE macro; the macro should be placed at the beginning 
of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' 
must be tagged with the LIBC_INLINE macro; the macro should be placed at the 
beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged 
with the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // 
LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,12 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with an external and visible declaration.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl)) {
+if (MethodDecl->getParent()->isLambda())
+  return;
+  }
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);


Index: clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,12 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with an external and visible declaration.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl)) {
+if (MethodDecl->getParent()->isLambda())
+  return;
+  }
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D148444#4272036 , @PiotrZSL wrote:

> Fix Linux build before committing & resolve all comments.

The log says that it failed because of the CMake version. I don't think I can 
fix that.




Comment at: 
clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp:66
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be 
tagged with the LIBC_INLINE macro; the macro should be placed at the beginning 
of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' 
must be tagged with the LIBC_INLINE macro; the macro should be placed at the 
beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged 
with the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]

PiotrZSL wrote:
> NOTE: If this warning were printed then we got other issue in this check.
> Implicit functions are checked, would be good to exclude them:
> `functionDecl(unless(isImplicit()))`.
That was printed without this patch. Are you saying we should have a separate 
check for these types of functions?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148444/new/

https://reviews.llvm.org/D148444

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp:64-67
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with 
the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be 
tagged with the LIBC_INLINE macro; the macro should be placed at the beginning 
of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' 
must be tagged with the LIBC_INLINE macro; the macro should be placed at the 
beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged 
with the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]

carlosgalvezp wrote:
> Not needed, simply write a plain comment explaining why the check should not 
> warn here.
Since I've already written it I think we should be able to keep it. If really 
you don't like the extra check lines I can remove them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148444/new/

https://reviews.llvm.org/D148444

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 513997.
jhuber6 added a comment.

Address nit


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148444/new/

https://reviews.llvm.org/D148444

Files:
  clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp


Index: 
clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with 
the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be 
tagged with the LIBC_INLINE macro; the macro should be placed at the beginning 
of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' 
must be tagged with the LIBC_INLINE macro; the macro should be placed at the 
beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged 
with the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // 
LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,12 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with an external and visible declaration.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl)) {
+if (MethodDecl->getParent()->isLambda())
+  return;
+  }
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);


Index: clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,12 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with an external and visible declaration.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl)) {
+if (MethodDecl->getParent()->isLambda())
+  return;
+  }
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 513990.
jhuber6 added a comment.

Add test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148444/new/

https://reviews.llvm.org/D148444

Files:
  clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp


Index: 
clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with 
the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be 
tagged with the LIBC_INLINE macro; the macro should be placed at the beginning 
of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' 
must be tagged with the LIBC_INLINE macro; the macro should be placed at the 
beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged 
with the LIBC_INLINE macro; the macro should be placed at the beginning of the 
declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // 
LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,11 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with an external and visible declaration.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl))
+if (MethodDecl->getParent()->isLambda())
+  return;
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);


Index: clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
===
--- clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
+++ clang-tools-extra/test/clang-tidy/checkers/llvmlibc/inline-function-decl.hpp
@@ -60,6 +60,14 @@
   }
 };
 
+LIBC_INLINE void lambda() {
+// CHECK-MESSAGES-NOT: :[[@LINE+4]]:3: warning: '__invoke' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+3]]:3: warning: 'operator void (*)()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+2]]:3: warning: '~(lambda at [[FILENAME:.+]])' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+// CHECK-MESSAGES-NOT: :[[@LINE+1]]:6: warning: 'operator()' must be tagged with the LIBC_INLINE macro; the macro should be placed at the beginning of the declaration [llvmlibc-inline-function-decl]
+  [](){};
+}
+
 } // namespace __llvm_libc
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_LLVMLIBC_INLINEFUNCTIONDECL_H
Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,11 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with an external and visible declaration.
+  if (const auto *MethodDecl = dyn_cast(FuncDecl))
+if (MethodDecl->getParent()->isLambda())
+  return;
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148444: [clang-tidy] Prevent `llvmlibc-inline-function-decl` triggering on lambdas

2023-04-15 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: lntue, michaelrj, sivachandra, gchatelet, 
goldstein.w.n.
Herald added subscribers: PiotrZSL, carlosgalvezp, xazax.hun.
Herald added a reviewer: njames93.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added a project: clang-tools-extra.
Herald added a subscriber: cfe-commits.

The `llvmlibc-inline-function-decl` check is intended to be used to
allow declarations in the `libc` project's header to be changed per-TU.
However, it is impossible to place this macro in front of a lambda so
this is not helpful. Additionally, lambdas are always going to have
internal linkage so they will not differ accross TUs.

Fixes https://github.com/llvm/llvm-project/issues/62147


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D148444

Files:
  clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp


Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,11 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with a external and visible declaration.
+  if (auto *MemberDecl = dyn_cast(FuncDecl))
+if (MemberDecl->getParent()->isLambda())
+  return;
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);


Index: clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
===
--- clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
+++ clang-tools-extra/clang-tidy/llvmlibc/InlineFunctionDeclCheck.cpp
@@ -39,6 +39,11 @@
 HeaderFileExtensions))
 return;
 
+  // Consider only functions with a external and visible declaration.
+  if (auto *MemberDecl = dyn_cast(FuncDecl))
+if (MemberDecl->getParent()->isLambda())
+  return;
+
   // Check if decl starts with LIBC_INLINE
   auto Loc = FullSourceLoc(Result.SourceManager->getFileLoc(SrcBegin),
*Result.SourceManager);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147572: [Clang][OpenMP] Fix failure with team-wide allocated variable

2023-04-06 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/OpenMP/target_team_variable_codegen.cpp:33
+//.
+// CHECK-NVIDIA: @local_a = internal addrspace(3) global [10 x i32] 
zeroinitializer, align 4
+//.

jdoerfert wrote:
> doru1004 wrote:
> > jhuber6 wrote:
> > > Shouldn't the Nvidia version also be undefined? Not sure why this should 
> > > vary depending on the target.
> > Perhaps NVIDIA code path can tolerate a zeroinitializer? I don't want to 
> > change it if it's not needed. I am basing this check on the code path for 
> > AMD GPUs and the initial bug that was reported.
> for AS 3 we should make it always poison.
We should probably change this in `HeadToShared` in `OpenMPOpt` as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147572/new/

https://reviews.llvm.org/D147572

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147572: [Clang][OpenMP] Fix failure with team-wide allocated variable

2023-04-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/OpenMP/target_team_variable_codegen.cpp:33
+//.
+// CHECK-NVIDIA: @local_a = internal addrspace(3) global [10 x i32] 
zeroinitializer, align 4
+//.

Shouldn't the Nvidia version also be undefined? Not sure why this should vary 
depending on the target.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147572/new/

https://reviews.llvm.org/D147572

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147666: [OPENMP] Adds /lib to rpath to avoid need to set LD_LIBRARY_PATH to find plugins.

2023-04-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

Using `-rpath` by default with OpenMP was removed after a long conversation in 
https://reviews.llvm.org/D143306. The way forward is most likely to have AOMP 
provide this in a resource file configuration. I think @ronlieb has a working 
version of that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147666/new/

https://reviews.llvm.org/D147666

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147579: [nvptx-arch] Dynamically load `libcuda.so.1` directly instead

2023-04-04 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGad6a7d7dc9a2: [nvptx-arch] Dynamically load `libcuda.so.1` 
directly instead (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147579/new/

https://reviews.llvm.org/D147579

Files:
  clang/tools/nvptx-arch/NVPTXArch.cpp


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -49,7 +49,7 @@
 CUresult (*cuDeviceGet)(CUdevice *, int);
 CUresult (*cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice);
 
-constexpr const char *DynamicCudaPath = "libcuda.so";
+constexpr const char *DynamicCudaPath = "libcuda.so.1";
 
 llvm::Error loadCUDA() {
   std::string ErrMsg;


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -49,7 +49,7 @@
 CUresult (*cuDeviceGet)(CUdevice *, int);
 CUresult (*cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice);
 
-constexpr const char *DynamicCudaPath = "libcuda.so";
+constexpr const char *DynamicCudaPath = "libcuda.so.1";
 
 llvm::Error loadCUDA() {
   std::string ErrMsg;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147579: [nvptx-arch] Dynamically load `libcuda.so.1` directly instead

2023-04-04 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added a reviewer: tra.
Herald added subscribers: mattd, gchakrabarti, asavonic, yaxunl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jholewinski.
Herald added a project: clang.

This patch loads the CUDA driver library directly via its real
`DT_SONAME`. This prevents the filesystem from needing to reload it in
cases when it's already loaded.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D147579

Files:
  clang/tools/nvptx-arch/NVPTXArch.cpp


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -49,7 +49,7 @@
 CUresult (*cuDeviceGet)(CUdevice *, int);
 CUresult (*cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice);
 
-constexpr const char *DynamicCudaPath = "libcuda.so";
+constexpr const char *DynamicCudaPath = "libcuda.so.1";
 
 llvm::Error loadCUDA() {
   std::string ErrMsg;


Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -49,7 +49,7 @@
 CUresult (*cuDeviceGet)(CUdevice *, int);
 CUresult (*cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice);
 
-constexpr const char *DynamicCudaPath = "libcuda.so";
+constexpr const char *DynamicCudaPath = "libcuda.so.1";
 
 llvm::Error loadCUDA() {
   std::string ErrMsg;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102107: [OpenMP] Codegen aggregate for outlined function captures

2023-04-04 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 510797.
jhuber6 added a comment.
Herald added subscribers: kbarton, nemanjai.

Fixed the Clang tests. Haven't touched the LLVM ones because this breaks 
SPMDzation and state machine rewrites completely in those tests. Someone who 
knows what this patch changes should look into what needs to be updated to make 
those tests match whatever form SPDMzation expects now. Also for some bizarre 
reason this patch breaks adding `alwaysinline` on `kmpc_parallel_51`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102107/new/

https://reviews.llvm.org/D102107

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c
  clang/test/AST/ast-dump-openmp-distribute-parallel-for.c
  clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c
  clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c
  clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c
  clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c
  clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
  clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
  clang/test/OpenMP/bug54082.c
  clang/test/OpenMP/bug60602.cpp
  clang/test/OpenMP/cancel_codegen.cpp
  clang/test/OpenMP/cancellation_point_codegen.cpp
  clang/test/OpenMP/debug-info-complex-byval.cpp
  clang/test/OpenMP/debug-info-openmp-array.cpp
  clang/test/OpenMP/debug_threadprivate_copyin.c
  clang/test/OpenMP/declare_target_codegen_globalization.cpp
  clang/test/OpenMP/declare_target_constexpr_codegen.cpp
  clang/test/OpenMP/declare_variant_construct_codegen_1.c
  clang/test/OpenMP/distribute_codegen.cpp
  clang/test/OpenMP/distribute_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
  clang/test/OpenMP/distribute_private_codegen.cpp
  clang/test/OpenMP/distribute_simd_codegen.cpp
  clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_simd_private_codegen.cpp
  clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
  clang/test/OpenMP/for_firstprivate_codegen.cpp
  clang/test/OpenMP/for_lastprivate_codegen.cpp
  clang/test/OpenMP/for_linear_codegen.cpp
  clang/test/OpenMP/for_private_codegen.cpp
  clang/test/OpenMP/for_reduction_codegen.cpp
  clang/test/OpenMP/for_reduction_codegen_UDR.cpp
  clang/test/OpenMP/for_reduction_task_codegen.cpp
  clang/test/OpenMP/irbuilder_safelen.cpp
  clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp
  clang/test/OpenMP/irbuilder_simd_aligned.cpp
  clang/test/OpenMP/irbuilder_simdlen.cpp
  clang/test/OpenMP/irbuilder_simdlen_safelen.cpp
  clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
  clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
  clang/test/OpenMP/metadirective_device_kind_codegen.c
  clang/test/OpenMP/metadirective_device_kind_codegen.cpp
  clang/test/OpenMP/metadirective_implementation_codegen.cpp
  clang/test/OpenMP/nested_loop_codegen.cpp
  clang/test/OpenMP/nvptx_SPMD_codegen.cpp
  clang/test/OpenMP/nvptx_allocate_codegen.cpp
  clang/test/OpenMP/nvptx_data_sharing.cpp
  clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
  clang/test/OpenMP/nvptx_lambda_capturing.cpp
  clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
  clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_num_threads_codege

[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-04-03 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf263bd8f7d4c: [Clang] Implicitly include LLVM libc headers 
for the GPU (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D146973?vs=509090&id=510560#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/gpu-libc-headers.c


Index: clang/test/Driver/gpu-libc-headers.c
===
--- /dev/null
+++ clang/test/Driver/gpu-libc-headers.c
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib 
--sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib 
--sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include/gpu-none-llvm"{{.*}}"-isysroot" "./"
+
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nogpuinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nostdinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nobuiltininc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// CHECK-HEADERS-DISABLED-NOT: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include/gpu-none-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1157,6 +1157,24 @@
   if (JA.isOffloading(Action::OFK_HIP))
 getToolChain().AddHIPIncludeArgs(Args, CmdArgs);
 
+  // If we are compiling for a GPU target we want to override the system 
headers
+  // with ones created by the 'libc' project if present.
+  if (!Args.hasArg(options::OPT_nostdinc) &&
+  !Args.hasArg(options::OPT_nogpuinc) &&
+  !Args.hasArg(options::OPT_nobuiltininc) &&
+  (getToolChain().getTriple().isNVPTX() ||
+   getToolChain().getTriple().isAMDGCN())) {
+
+  // Add include/gpu-none-libc/* to our system include path. This lets us 
use
+  // GPU-specific system headers first. These headers should be made to be
+  // compatible with the host environment's headers.
+  SmallString<128> P(llvm::sys::path::parent_path(D.InstalledDir));
+  llvm::sys::path::append(P, "include");
+  llvm::sys::path::append(P, "gpu-none-llvm");
+  CmdArgs.push_back("-c-isystem");
+  CmdArgs.push_back(Args.MakeArgString(P));
+  }
+
   // If we are offloading to a target via OpenMP we need to include the
   // openmp_wrappers folder which contains alternative system headers.
   if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&


Index: clang/test/Driver/gpu-libc-headers.c
===
--- /dev/null
+++ clang/test/Driver/gpu-libc-headers.c
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib --sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib --sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" "{{.*}}include/gpu-none-llvm"{{.*}}"-isysroot" "./"
+
+// RUN:   %clang -### --target=amdgcn-amd-am

[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-04-03 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:1196-1197
 
+  // If we are compiling for a GPU target we want to override the system 
headers
+  // with ones created by the 'libc' project if present.
+  if (!Args.hasArg(options::OPT_nostdinc) &&

tra wrote:
> Please add a TODO with some details outlining what it's supposed to do, the 
> issues we've discussed, and that this is intended to be a temporary solution 
> (famous last words, I know).
> 
> 
I'm not sure if we should consider this temporary, we simply need to ensure 
that the headers in this directory are compatible with the host environment 
somehow.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-04-03 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147365: [HIPSPV] Remove useIntegratedAs. NFC

2023-03-31 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

I don't know the toolchain, does `HIPSPV` perform as expected if you pass 
`-fno-integrated-as`? The difference is that `useIntegratedAs` forces it to 
always be enabled so the user can't change it AFAIK.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147365/new/

https://reviews.llvm.org/D147365

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145815: [Flang][Driver] Add support for fopenmp-is-device and fembed-offload-object to Flang ToolChain

2023-03-29 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D145815#4230780 , @jeanPerier 
wrote:

> @agozillon, in the test added here (omp-frontend-forwarding.f90), I am seeing 
> failures in some patches windows pre-merge checks that I think are not 
> related to the patches.
> Could you check if there is a stability/reproducibility issue with the 
> omp-frontend-forwarding.f90 on windows?
>
> The failure message look like:
>
>   # command stderr:
>   
> C:\ws\w8\llvm-project\premerge-checks\flang\test\Driver\omp-frontend-forwarding.f90:21:23:
>  error: CHECK-OPENMP-EMBED: expected string not found in input
>   ! CHECK-OPENMP-EMBED: "{{[^"]*}}clang-offload-packager" {{.*}} 
> "--image=file={{.*}}.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp"
> ^
>   :6:435: note: scanning from here
>"c:\\ws\\w8\\llvm-project\\premerge-checks\\build\\bin\\flang-new" "-fc1" 
> "-triple" "amdgcn-amd-amdhsa" "-emit-llvm-bc" "-fopenmp" "-mrelocation-model" 
> "pic" "-pic-level" "2" "-fopenmp-is-device" "-o" 
> "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp\\lit-tmp-1koflt_t\\omp-frontend-forwarding-gfx90a-6808f0.bc"
>  "-x" "f95-cpp-input" 
> "C:\\ws\\w8\\llvm-project\\premerge-checks\\flang\\test\\Driver\\omp-frontend-forwarding.f90"
>   
>   
>   
>   
>   
>   ^
>   :7:266: note: possible intended match here
>"c:\\program files\\llvm\\bin\\clang-offload-packager.exe" "-o" 
> "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp\\lit-tmp-1koflt_t\\omp-frontend-forwarding-9beea6.out"
>  
> "--image=file=C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp\\lit-tmp-1koflt_t\\omp-frontend-forwarding-gfx90a-6808f0.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp"
>
> I wonder if this is an issue with the ".exe" command suffix in the windows 
> output.
>
> Example of pre-merge failures:
>
> https://reviews.llvm.org/D146989
> https://buildkite.com/llvm-project/premerge-checks/builds/143821#01872b65-9b0c-457b-8714-c1f0ca00d02b
>
> or
> https://reviews.llvm.org/D147130
> https://buildkite.com/llvm-project/premerge-checks/builds/143873#01872ccb-9251-499a-b9fd-9155e3ffb1f1

Most likely the classic `.exe` on Windows problem.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145815/new/

https://reviews.llvm.org/D145815

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-28 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 509090.
jhuber6 added a comment.

Changing to use the `gpu-none-llvm` subfolder name that @sivachandra 
recommended. Also adding a `--sysroot` argument to show that this include path 
shows up first.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/gpu-libc-headers.c


Index: clang/test/Driver/gpu-libc-headers.c
===
--- /dev/null
+++ clang/test/Driver/gpu-libc-headers.c
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
--sysroot=./ \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib 
--sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib 
--sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include/gpu-none-llvm"{{.*}}"-isysroot" "./"
+
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nogpuinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nostdinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nobuiltininc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// CHECK-HEADERS-DISABLED-NOT: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include/gpu-none-llvm"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1193,6 +1193,23 @@
   if (JA.isOffloading(Action::OFK_HIP))
 getToolChain().AddHIPIncludeArgs(Args, CmdArgs);
 
+  // If we are compiling for a GPU target we want to override the system 
headers
+  // with ones created by the 'libc' project if present.
+  if (!Args.hasArg(options::OPT_nostdinc) &&
+  !Args.hasArg(options::OPT_nogpuinc) &&
+  !Args.hasArg(options::OPT_nobuiltininc) &&
+  (getToolChain().getTriple().isNVPTX() ||
+   getToolChain().getTriple().isAMDGCN())) {
+
+  // Add include/gpu-none-libc/* to our system include path. This lets us 
use
+  // GPU-specific system headers first.
+  SmallString<128> P(llvm::sys::path::parent_path(D.InstalledDir));
+  llvm::sys::path::append(P, "include");
+  llvm::sys::path::append(P, "gpu-none-llvm");
+  CmdArgs.push_back("-c-isystem");
+  CmdArgs.push_back(Args.MakeArgString(P));
+  }
+
   // If we are offloading to a target via OpenMP we need to include the
   // openmp_wrappers folder which contains alternative system headers.
   if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&


Index: clang/test/Driver/gpu-libc-headers.c
===
--- /dev/null
+++ clang/test/Driver/gpu-libc-headers.c
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib --sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib --sysroot=./ %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" "{{.*}}include/gpu-none-llvm"{{.*}}"-isysroot" "./"
+
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nogpuinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nog

[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-28 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D146973#4228070 , @tra wrote:

> I'm OK with injecting the path *now* with an understanding that it's a 
> short-term "happens to work" way to move forward while we're working on a 
> better solution.

So, the proposed path forward is this. We have `libc` generate its own headers 
so we can have a base implementation. We create these headers with the 
intention of them providing a full interface between the host and device. This 
might mean curating some differences based on whatever the host does, or just 
making sure we choose sizes that are compatible. So these headers are the 
expected interface to the `libc` implementations we support, but we ensure that 
things match between the host and device by only providing interfaces we've 
verified somehow.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-28 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D146973#4227433 , @aaron.ballman 
wrote:

> I am not asking you to implement a library based off another implementation's 
> specification. I am relaying implementation experience with the design you've 
> chosen for your implementation and how well it's worked in other, related 
> projects. Given that two different technologies have both run into this same 
> problem, I think the llvm-libc folks should carefully consider the design 
> decisions here. If it turns out this is the best way forward, that's fine.

Sorry, that was more directed at Johannes, This is definitely a hard problem. 
Each approach has certain benefits, but I think keeping the headers synced like 
we do in OpenMP has mainly worked thus far because we don't have any actual 
implementations of most of it. If we want to provide a library I don't think 
there's a reasonable way to implement it as a unified header unless we control 
the system header as well. I'm hoping that `libc` offers a sufficiently small 
surface that we should be able to provide functionality that's expected for 
both. And in some cases it should be fine to share existing headers, but it 
shouldn't be the expected route it all I'm saying.

> I'm not asking you to copy other libc headers. I'm pointing out that having 
> two separate headers, one for host and one for device, is a recipe for 
> problems in practice because these two will invariably get out of sync in 
> really fascinating ways that are extremely hard for people to debug. But 
> maybe there's a misunderstanding here: I am assuming we consider it to be 
> unsupported to use glibc/musl/etc on the host and llvm-libc on the device, 
> but maybe that's a faulty assumption.

We can't do that for the time being, since LLVM's `libc` is still in 
development. It's not a sufficient replacement for the host `libc` at this 
point. It may be an interesting point to get to in the future, it would make it 
much easier to keep things in sync for sure. It may be easier to stipulate 
something like that with `libc++` when we get to that point since `libc++` is 
more complete as far as I'm aware.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-28 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D146973#4227114 , @aaron.ballman 
wrote:

> Hmmm, I've had experience with SYCL as to how it goes when you have 
> difference between host and device; those kinds of bugs are incredibly hard 
> to track down. Pointer sizes being compatible is a good start, but you need 
> integer widths, floating point formats, structure layout decisions, macro 
> definitions, etc to all be the same as well. Having only one set of headers 
> that can be used helps users avoid these sort of problems.

The problem is that we are trying to implement an actual library here. It is, 
in my opinion, completely unreasonable to try to implement a library based off 
of another implementation's specification. What you are suggesting is that we 
implement a GPU library that copies every internal implementation detail that 
GNU has for that platform. So, let's just copy-paste their headers into our 
LLVM `libc` and make sure we copy all of their implementations too. Now what if 
someone wants to use `musl` instead? Do we copy that one as well and have 
everything surrounded by `ifdef`s? Do we just implement some meta libc that is 
compatible with every other `libc`? This is not going to create a usable 
library, and as the person who would presumably need to write it, I'm not going 
to spend my time copying other `libc` headers.

We need to provide fully-custom headers, if this fully-custom header uses 
`#include_next` after we've verified that it doesn't break, that's fine. I'm 
not particularly concerned if a macro or function is undefined between the CPU 
and GPU. The important point is that any symbol or macro we provide in the 
GPU's headers has an implementation that is expected to be compatible with the 
host. It's understandable if the macros and functions map to something slightly 
different, as long as it does what we say it does.

> So we're comfortable painting ourselves into a corner where llvm-libc is only 
> usable with Clang, depending on the target?

There might be somewhat of a misunderstanding here, I'm talking about the GPU 
implementation of `libc` using LLVM's `libc`. Expecting a specific toolchain is 
standard procedure for every single other offloading language. It's how we 
build ROCm device libraries, CUDA device libraries, the OpenMP device runtime, 
etc. LLVM's `libc` project is perfectly fine being compiled with `gcc`, but the 
GPU is such a special case we don't have that luxury and need to use `clang`. 
This is the same approach we do for OpenMP already.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D146973#4225983 , @jdoerfert wrote:

> I said this before, many times:
>
> We don't want to have different host and device libraries that are 
> incompatible.
> Effectively, what we really want, is the host environment to just work on the 
> GPU.
> That includes extensions in the host headers, macros, taking the address of 
> stuff, etc.
> This became clear when we made (c)math.h available on the GPU (for OpenMP).

The problem is that we cannot control the system headers, they are not expected 
to work with `llvm-libc`. For example: the GNU `ctype.h` includes `features.h` 
which will attempt to include the 32-bit stubs file because the GPU is not a 
recognized target on the host. If you work around that, like we do in OpenMP, 
then you will realize that `isalnum` is actually a macro to `__isctype` which 
references and external table called `__ctype_b_loc` which isn't defined in the 
C standard. So, now we have a header that causes `isalnum` to not longer call 
the implementation in LLVM's `libc`, it also fails at link time because there 
is no reference to `__ctype_b_loc` in LLVM's `libc`. What is the solution here? 
Do we implement `libc` in LLVM with a workaround for every internal 
implementation in the GNU `libc`?

> For most of libc, we might get away with custom GPU headers but eventually it 
> will break "expected to work" user code, at the latest when we arrive at 
> libc++.
> A user can, right now, map a std::vector from the host to the device, and, 
> assuming they properly did the deep copy, it will work.
> If we end up with different sizes, alignments, layouts, this will not only 
> break, but we will also break any structure that depends on those sizes, 
> e.g., mapping an object with a std::map inside even if it is not accessed 
> will cause problems.
>
> In addition, systems are never "vanilla". We want to include the system 
> headers to get the extensions users might rely on. Providing only alternative 
> headers even breaks working code (in the OpenMP case), e.g., when we 
> auto-translate definitions in the header to the device (not a CUDA thing, I 
> think).

Using custom generated headers is the only approach that is guaranteed to 
actually work when we compile this. We cannot sanely implement a library using 
headers unique to another implementation targeting an entirely different 
machine, we will endlessly be chasing implementation details like above. This 
works in OpenMP currently because we've chosen a handful of headers that this 
doesn't completely break for.

> I strongly suggest to include our GPU headers first, in them we setup the 
> overlays for the system headers, and then we include the system versions.
> This works for (c)math.h, complex, and other parts of libc and libc++ 
> already, even though we don't ship them as libraries.

The wrapper approach works fine for the ones we've selected. And in the GPU 
`libc` we could generate our own headers that have `#include_next` in them if 
we verify that it works for that header. I think in general though, we need to 
work with custom headers first, and implement a set of features we know to work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146975: [NVPTX] Add __CUDA_ARCH__ macro to standalone NVPTX compilations

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
jhuber6 marked an inline comment as done.
Closed by commit rGbed7005eb4d4: [NVPTX] Add __CUDA_ARCH__ macro to standalone 
NVPTX compilations (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D146975?vs=508672&id=508828#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146975/new/

https://reviews.llvm.org/D146975

Files:
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/test/Frontend/standalone-nvptx-macros.c


Index: clang/test/Frontend/standalone-nvptx-macros.c
===
--- /dev/null
+++ clang/test/Frontend/standalone-nvptx-macros.c
@@ -0,0 +1,5 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang %s -c -E -dM --target=nvptx64-nvidia-cuda -march=sm_70 -o - | \
+// RUN:   FileCheck --check-prefix=CHECK-CUDA-ARCH %s
+// CHECK-CUDA-ARCH: #define __CUDA_ARCH__ 700
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -168,7 +168,7 @@
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
-  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
   switch (GPU) {


Index: clang/test/Frontend/standalone-nvptx-macros.c
===
--- /dev/null
+++ clang/test/Frontend/standalone-nvptx-macros.c
@@ -0,0 +1,5 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang %s -c -E -dM --target=nvptx64-nvidia-cuda -march=sm_70 -o - | \
+// RUN:   FileCheck --check-prefix=CHECK-CUDA-ARCH %s
+// CHECK-CUDA-ARCH: #define __CUDA_ARCH__ 700
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -168,7 +168,7 @@
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
-  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
   switch (GPU) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D146973#4225641 , @aaron.ballman 
wrote:

>> This lets offloading languages such as OpenMP use the system string.h when 
>> compiling for the host and then the LLVM libc string.h when targeting the 
>> GPU.
>
> How do we avoid ABI issues when the two headers get sufficiently out of sync? 
> (In general, I'm pretty surprised to hear we would want different headers for 
> the GPU and the system -- does this affect conformance requirements from the 
> C standard?)

I'm not entirely sure if there's a good method. I think no matter what we do 
we'll need to implement some kind of 'glue'. I think most should be fine if we 
go by the C-standard. We expect pointer sizes and everything to be compatible 
at least.

> Excuse my ignorance on this point, but is llvm-libc intended to work with any 
> compiler other than Clang? (e.g., will other compilers need to do this dance 
> as well?)

Right now the GPU target I'm working on can only be built with `clang` and it 
will be that way for the foreseeable future.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:1230
+  llvm::sys::path::append(P, "llvm-libc");
+  CmdArgs.push_back("-c-isystem");
+  CmdArgs.push_back(Args.MakeArgString(P));

sivachandra wrote:
> tra wrote:
> > sivachandra wrote:
> > > jhuber6 wrote:
> > > > tra wrote:
> > > > > jhuber6 wrote:
> > > > > > tra wrote:
> > > > > > > Ensuring the right include order will be tricky. Interaction 
> > > > > > > between the headers provided by llvm-libc vs the system headers 
> > > > > > > will be interesting if we end up mixing the headers. It may be 
> > > > > > > less problematic compared to the C++ standard library, but I 
> > > > > > > doubt that mixing two implementations would work well here, 
> > > > > > > either. 
> > > > > > > 
> > > > > > > So, the major question I have -- does adding include path here 
> > > > > > > guarantees that we're not picking up the host headers? I do not 
> > > > > > > see any changes that would exclude system headers from include 
> > > > > > > paths.
> > > > > > > If we do have include paths leading to both llvm-libc and the 
> > > > > > > host headers, what's expected to happen if user code ends up 
> > > > > > > including a header that's not present in  llvm-libc? Is that 
> > > > > > > possible?
> > > > > > > 
> > > > > > Right now I'm just kind of relying on an expectation that since 
> > > > > > this will be the first `c-isystem` path set, then it will pull in 
> > > > > > these libraries first if they exist. It's not captured by the 
> > > > > > tests, but compiling with `-v` shows this path being used first in 
> > > > > > my experience. So, theoretically, if there is an implementation of 
> > > > > > said header in this location, it will be picked up before anything 
> > > > > > else. Otherwise it'll just search the other standard locations.
> > > > > I think this will be a problem. We're cross-compiling here and for 
> > > > > that to work reliably we need to make sure that only target headers 
> > > > > are in effect. The problem is that we likely just do not have 
> > > > > sufficiently complete set of headers for the GPU. Do we? I have no 
> > > > > idea what exactly llvm-libc provides and whether it is sufficient for 
> > > > > normal user code to cross-compile for a GPU. 
> > > > > 
> > > > > It would be interesting to try to compile some C++ code which would 
> > > > > include commonly used, but generally target-agnostic, headers like 
> > > > >   , etc and check whether we end up 
> > > > > pulling in any system headers. Then check what happens if we do not 
> > > > > have system headers available at all.
> > > > No, it's definitely not complete. Some headers work on the GPU, most 
> > > > break in some way or another. The only ones `llvm-libc` provides 
> > > > currently is `string.h` and `ctype.h`. But, I figured this wouldn't be 
> > > > a problem since it would just go to the system headers anyway if we 
> > > > didn't provide them. So we are merely replacing maybe broken with 
> > > > probably works.
> > > > 
> > > > I was talking with Johannes and he brings up other issues about the 
> > > > idea of host-device compatibility between these headers. Since, 
> > > > fundamentally, right now `libc` generates its own headers and needs to 
> > > > generate its own headers to function. But there can be a problem when 
> > > > migrating data between the host and the device is the headers on the 
> > > > host differ somewhat to those on the device. I'm not sure what a good 
> > > > overall solution to that problem is.
> > > Normally, one should not expect target and host headers to be compatible. 
> > > So, if you are building for the host, you should use host headers and if 
> > > you are building for the target, you should use target headers. Does 
> > > general GPU build not follow this approach? May be there are some common 
> > > headers but I do not expect them to be from the standard libraries.
> > We can generally assume that the GPU and the host do have largely identical 
> > types. At least the subset of the types we expect to exchange between host 
> > and GPU.
> > CUDA compilation cheats, and allows the host to provide most of the 
> > headers, with clang and CUDA SDK providing a subset of GPU-side overloads. 
> > This way, if GPU-side functions do implicitly rely on the code nominally 
> > provided for the host by the host headers, but if we need to code-gen it, 
> > we can only do so for a subset that's actually compileable for the GPU -- 
> > either constexpr functions, lambdas or `__device__` overloads provided by 
> > us.
> > 
> > Standalone compilation does not have the benefit of having the cake and 
> > being able to eat it. It has to be all or nothing, as we do not have the 
> > ability to separate the host and GPU code we pull in via headers, nor can 
> > we provide a GPU-side overloads. In a way injecting llvm-libc path is a 
> > crude attempt to do that by 

[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D146973#4225300 , @tschuett wrote:

> Could you hide the amdgpu and nvptx somewhere libc here `clang 
> -print-resource-dir` in two different directories?  One for AMD, one for 
> NVPTX.

So, right now this header is installed from the `libc` projects. So is there a 
good way to communicate the resource directory as an install target? Also I 
think just calling it `gpu` would be sufficient, I'd like these to be common 
between the GPUs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:1230
+  llvm::sys::path::append(P, "llvm-libc");
+  CmdArgs.push_back("-c-isystem");
+  CmdArgs.push_back(Args.MakeArgString(P));

tra wrote:
> jhuber6 wrote:
> > tra wrote:
> > > Ensuring the right include order will be tricky. Interaction between the 
> > > headers provided by llvm-libc vs the system headers will be interesting 
> > > if we end up mixing the headers. It may be less problematic compared to 
> > > the C++ standard library, but I doubt that mixing two implementations 
> > > would work well here, either. 
> > > 
> > > So, the major question I have -- does adding include path here guarantees 
> > > that we're not picking up the host headers? I do not see any changes that 
> > > would exclude system headers from include paths.
> > > If we do have include paths leading to both llvm-libc and the host 
> > > headers, what's expected to happen if user code ends up including a 
> > > header that's not present in  llvm-libc? Is that possible?
> > > 
> > Right now I'm just kind of relying on an expectation that since this will 
> > be the first `c-isystem` path set, then it will pull in these libraries 
> > first if they exist. It's not captured by the tests, but compiling with 
> > `-v` shows this path being used first in my experience. So, theoretically, 
> > if there is an implementation of said header in this location, it will be 
> > picked up before anything else. Otherwise it'll just search the other 
> > standard locations.
> I think this will be a problem. We're cross-compiling here and for that to 
> work reliably we need to make sure that only target headers are in effect. 
> The problem is that we likely just do not have sufficiently complete set of 
> headers for the GPU. Do we? I have no idea what exactly llvm-libc provides 
> and whether it is sufficient for normal user code to cross-compile for a GPU. 
> 
> It would be interesting to try to compile some C++ code which would include 
> commonly used, but generally target-agnostic, headers like   
> , etc and check whether we end up pulling in any system headers. 
> Then check what happens if we do not have system headers available at all.
No, it's definitely not complete. Some headers work on the GPU, most break in 
some way or another. The only ones `llvm-libc` provides currently is `string.h` 
and `ctype.h`. But, I figured this wouldn't be a problem since it would just go 
to the system headers anyway if we didn't provide them. So we are merely 
replacing maybe broken with probably works.

I was talking with Johannes and he brings up other issues about the idea of 
host-device compatibility between these headers. Since, fundamentally, right 
now `libc` generates its own headers and needs to generate its own headers to 
function. But there can be a problem when migrating data between the host and 
the device is the headers on the host differ somewhat to those on the device. 
I'm not sure what a good overall solution to that problem is.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:1230
+  llvm::sys::path::append(P, "llvm-libc");
+  CmdArgs.push_back("-c-isystem");
+  CmdArgs.push_back(Args.MakeArgString(P));

tra wrote:
> Ensuring the right include order will be tricky. Interaction between the 
> headers provided by llvm-libc vs the system headers will be interesting if we 
> end up mixing the headers. It may be less problematic compared to the C++ 
> standard library, but I doubt that mixing two implementations would work well 
> here, either. 
> 
> So, the major question I have -- does adding include path here guarantees 
> that we're not picking up the host headers? I do not see any changes that 
> would exclude system headers from include paths.
> If we do have include paths leading to both llvm-libc and the host headers, 
> what's expected to happen if user code ends up including a header that's not 
> present in  llvm-libc? Is that possible?
> 
Right now I'm just kind of relying on an expectation that since this will be 
the first `c-isystem` path set, then it will pull in these libraries first if 
they exist. It's not captured by the tests, but compiling with `-v` shows this 
path being used first in my experience. So, theoretically, if there is an 
implementation of said header in this location, it will be picked up before 
anything else. Otherwise it'll just search the other standard locations.



Comment at: clang/test/Driver/gpu-libc-headers.c:14
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" "{{.*}}include/llvm-libc"
+

tra wrote:
> I think here we want to test for not just the presence of 
> `include/llvm-libc`, but also that it's in the correct position relative to 
> other include paths. 
> 
> We may want something similar to what we have in 
> clang/test/Driver/hip-include-path.hip
Yeah, I wasn't sure if there was a good way to guarantee a certain path since 
those can change based on the system. Maybe `--sysroot`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146975: [NVPTX] Add __CUDA_ARCH__ macro to standalone NVPTX compilations

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 marked an inline comment as done.
jhuber6 added inline comments.



Comment at: clang/lib/Basic/Targets/NVPTX.cpp:171
   Builder.defineMacro("__NVPTX__");
-  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.

tra wrote:
> Wouldn't just `if(!HostTarget)` be sufficient here?
> 
`HostTarget` is the host toolchain, provided via `-aux-triple`. So it's set for 
OpenMP and CUDA but not for a standalone.



Comment at: clang/test/Frontend/nvptx-macros.c:1
+// REQUIRES: nvptx-registered-target
+

tra wrote:
> I'd rename the file to make it more obvious that it deals with a standalone 
> compilation.
> 
> `standalone-nvptx-macros.c` ?
I can do that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146975/new/

https://reviews.llvm.org/D146975

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146326: [LinkerWrapper] Do not extract globals with no offloading language

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb530e1af62be: [LinkerWrapper] Do not extract globals with no 
offloading language (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146326/new/

https://reviews.llvm.org/D146326

Files:
  clang/test/Driver/linker-wrapper-libs.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1162,7 +1162,8 @@
 
 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
 /// extract any symbols from it.
-Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, StringSaver &Saver,
+Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
+ StringSaver &Saver,
  DenseMap &Syms) {
   Expected IRSymtabOrErr = readIRSymtab(Buffer);
   if (!IRSymtabOrErr)
@@ -1182,9 +1183,10 @@
   ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
!Sym.isUndefined());
   // We will extract if it defines a new global symbol visible to the host.
+  // This is only necessary for code targeting an offloading language.
   bool NewGlobalSymbol =
   ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
-   !Sym.canBeOmittedFromSymbolTable() &&
+   !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
(Sym.getVisibility() != GlobalValue::HiddenVisibility));
   ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
 
@@ -1203,7 +1205,8 @@
 
 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
 /// any symbols from it.
-Expected getSymbolsFromObject(const ObjectFile &Obj, StringSaver &Saver,
+Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
+StringSaver &Saver,
 DenseMap &Syms) {
   bool ShouldExtract = false;
   for (SymbolRef Sym : Obj.symbols()) {
@@ -1228,9 +1231,11 @@
!(*FlagsOrErr & SymbolRef::SF_Undefined);
 
 // We will extract if it defines a new global symbol visible to the host.
-bool NewGlobalSymbol = ((NewSymbol || (OldSym & Sym_Undefined)) &&
-!(*FlagsOrErr & SymbolRef::SF_Undefined) &&
-!(*FlagsOrErr & SymbolRef::SF_Hidden));
+// This is only necessary for code targeting an offloading language.
+bool NewGlobalSymbol =
+((NewSymbol || (OldSym & Sym_Undefined)) &&
+ !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
+ !(*FlagsOrErr & SymbolRef::SF_Hidden));
 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
 
 // Update this symbol in the "table" with the new information.
@@ -1250,18 +1255,18 @@
 ///   1) It defines an undefined symbol in a regular object filie.
 ///   2) It defines a global symbol without hidden visibility that has not
 ///  yet been defined.
-Expected getSymbols(StringRef Image, StringSaver &Saver,
+Expected getSymbols(StringRef Image, OffloadKind Kind, StringSaver &Saver,
   DenseMap &Syms) {
   MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
   switch (identify_magic(Image)) {
   case file_magic::bitcode:
-return getSymbolsFromBitcode(Buffer, Saver, Syms);
+return getSymbolsFromBitcode(Buffer, Kind, Saver, Syms);
   case file_magic::elf_relocatable: {
 Expected> ObjFile =
 ObjectFile::createObjectFile(Buffer);
 if (!ObjFile)
   return ObjFile.takeError();
-return getSymbolsFromObject(**ObjFile, Saver, Syms);
+return getSymbolsFromObject(**ObjFile, Kind, Saver, Syms);
   }
   default:
 return false;
@@ -1336,8 +1341,9 @@
 if (IsArchive && !WholeArchive && !Syms.count(Binary))
   continue;
 
-Expected ExtractOrErr =
-getSymbols(Binary.getBinary()->getImage(), Saver, Syms[Binary]);
+Expected ExtractOrErr = getSymbols(
+Binary.getBinary()->getImage(),
+Binary.getBinary()->getOffloadKind(), Saver, Syms[Binary]);
 if (!ExtractOrErr)
   return ExtractOrErr.takeError();
 
Index: clang/test/Driver/linker-wrapper-libs.c
===
--- clang/test/Driver/linker-wrapper-libs.c
+++ clang/test/Driver/linker-wrapper-libs.c
@@ -64,6 +64,29 @@
 // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
 
+//
+// Check th

[PATCH] D146975: [NVPTX] Add __CUDA_ARCH__ macro to standalone NVPTX compilations

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: tra, tianshilei1992, ye-luo, jdoerfert.
Herald added subscribers: mattd, gchakrabarti, asavonic, yaxunl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1, jholewinski.
Herald added a project: clang.

We can now target the NVPTX architecture directly via
`--target=nvptx64-nvidia-cuda`. This currently does not define the
`__CUDA_ARCH__` macro with is used to allow code to target different
codes based on support. This patch simply adds this support.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146975

Files:
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/test/Frontend/nvptx-macros.c


Index: clang/test/Frontend/nvptx-macros.c
===
--- /dev/null
+++ clang/test/Frontend/nvptx-macros.c
@@ -0,0 +1,5 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang %s -c -E -dM --target=nvptx64-nvidia-cuda -march=sm_70 -o - | \
+// RUN:   FileCheck --check-prefix=CHECK-CUDA-ARCH %s
+// CHECK-CUDA-ARCH: #define __CUDA_ARCH__ 700
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -168,7 +168,7 @@
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
-  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
   switch (GPU) {


Index: clang/test/Frontend/nvptx-macros.c
===
--- /dev/null
+++ clang/test/Frontend/nvptx-macros.c
@@ -0,0 +1,5 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang %s -c -E -dM --target=nvptx64-nvidia-cuda -march=sm_70 -o - | \
+// RUN:   FileCheck --check-prefix=CHECK-CUDA-ARCH %s
+// CHECK-CUDA-ARCH: #define __CUDA_ARCH__ 700
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -168,7 +168,7 @@
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
-  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
   switch (GPU) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

I'm not sure if there's a better way to provide these headers. Like if we let 
the `libc` project output to the Clang resource directory or some other neatly 
nested directory. Right now this just picks up `bin/clang/../include/llvm-libc`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146973/new/

https://reviews.llvm.org/D146973

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146973: [Clang] Implicitly include LLVM libc headers for the GPU

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: tra, yaxunl, JonChesterfield, sivachandra, MaskRay, 
jdoerfert, tianshilei1992.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

There is currently work to support basic `libc` functionality on the
GPU. Some basic information about the projects can be found at
https://libc.llvm.org/gpu_mode.html. Typically, including the system
headers on the GPU will result in an error. For this reason the LLVM
`libc` project will generate its own headers that can be used with the
GPU.

The problem is that these headers will use the same name as the system headers.
For that reason, D146970  places it in the 
`llvm-libc` subfolder. In order to
still pick these files up, this patch adds changes in clang to default to
searching this directory when targeting the GPU. This lets offloading languages
such as OpenMP use the system `string.h` when compiling for the host and then
the LLVM libc `string.h` when targeting the GPU.

Depends on D146970 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146973

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/gpu-libc-headers.c


Index: clang/test/Driver/gpu-libc-headers.c
===
--- /dev/null
+++ clang/test/Driver/gpu-libc-headers.c
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_70 -nogpulib %s 
2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib %s 
2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-HEADERS
+// CHECK-HEADERS: "-cc1"{{.*}}"-c-isystem" "{{.*}}include/llvm-libc"
+
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nogpuinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nostdinc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN: -nobuiltininc %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-HEADERS-DISABLED
+// CHECK-HEADERS-DISABLED-NOT: "-cc1"{{.*}}"-c-isystem" 
"{{.*}}include/llvm-libc"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1214,6 +1214,23 @@
 CmdArgs.push_back("__clang_openmp_device_functions.h");
   }
 
+  // If we are compiling for a GPU target we want to override the system 
headers
+  // with ones created by the 'libc' project if present.
+  if (!Args.hasArg(options::OPT_nostdinc) &&
+  !Args.hasArg(options::OPT_nogpuinc) &&
+  !Args.hasArg(options::OPT_nobuiltininc) &&
+  (getToolChain().getTriple().isNVPTX() ||
+   getToolChain().getTriple().isAMDGCN())) {
+
+  // Add include/llvm-libc/* to our system include path. This lets us use
+  // GPU-specific system headers first.
+  SmallString<128> P(llvm::sys::path::parent_path(D.InstalledDir));
+  llvm::sys::path::append(P, "include");
+  llvm::sys::path::append(P, "llvm-libc");
+  CmdArgs.push_back("-c-isystem");
+  CmdArgs.push_back(Args.MakeArgString(P));
+  }
+
   // Add -i* options, and automatically translate to
   // -include-pch/-include-pth for transparent PCH support. It's
   // wonky, but we include looking for .gch so we can support seamless


Index: clang/test/Driver/gpu-libc-headers.c
===
--- /dev/null
+++ clang/test/Driver/gpu-libc-headers.c
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70

[PATCH] D146326: [LinkerWrapper] Do not extract globals with no offloading language

2023-03-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146326/new/

https://reviews.llvm.org/D146326

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-03-24 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG29a5c3c8fe30: [NVPTX] Introduce attribute to mark kernels 
without a language mode (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D140226?vs=508170&id=508197#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

Files:
  clang/include/clang/Basic/Attr.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGen/nvptx_attributes.c
  clang/test/Misc/pragma-attribute-supported-attributes-list.test


Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -94,6 +94,7 @@
 // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
 // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
+// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
 // CHECK-NEXT: Naked (SubjectMatchRule_function)
 // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
 // CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
Index: clang/test/CodeGen/nvptx_attributes.c
===
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s 
-o - | FileCheck %s
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 1, ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = 1;
+}
+
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4970,7 +4970,10 @@
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
 S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else
+D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
   // a helper function for launching the kernel. The instructions in the helper
   // function has nothing to do with the source code of the kernel. Do not emit
@@ -8851,6 +8854,7 @@
   case ParsedAttr::AT_CalledOnce:
 handleCalledOnceAttr(S, D, AL);
 break;
+  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
 handleGlobalAttr(S, D, AL);
 break;
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7373,6 +7373,11 @@
   }
 }
   }
+
+  // Attach kernel metadata directly if compiling for NVPTX.
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Index: clang/include/clang/Basic/Attr.td
===
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -414,6 +414,7 @@
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -1221,6 +1222,12 @@
 }
 def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
 
+def NVPTXKernel : InheritableAttr, TargetSpecificAttr {
+  let Spellings = [Clang<"nvptx_kernel">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];
+}
+
 def HIPManaged : InheritableAttr {
   let Spellings = [GNU<"managed">, Declspec<"__managed__">];
   let Subjects = SubjectList<[Var]>;


Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported

[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-03-24 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 508170.
jhuber6 added a comment.

Updating to simply add an entirely new attribute again. The existing
`CUDAGlobal` attribute does what we want, but it's also highly coupled with the
CUDA language. This made it pretty much impossible to find a way to re-use it
without breaking existing functionality. The amount of code duplicated is
minimal and this is required to be able to emit a callable kernel targeting
NVPTX directly. I'd like to use this for my ongoing GPU `libc` project so I'd
appreciate someone looking at this again.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

Files:
  clang/include/clang/Basic/Attr.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGen/nvptx_attributes.c
  clang/test/Misc/pragma-attribute-supported-attributes-list.test

Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -94,6 +94,7 @@
 // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
 // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
+// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
 // CHECK-NEXT: Naked (SubjectMatchRule_function)
 // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
 // CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
Index: clang/test/CodeGen/nvptx_attributes.c
===
--- /dev/null
+++ clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,23 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:store i32 1, ptr [[TMP0]], align 4
+// CHECK-NEXT:ret void
+//
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = 1;
+}
+//.
+// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
+//.
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
+// CHECK: !1 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !2 = !{!"clang version 17.0.0"}
+//.
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4970,7 +4970,10 @@
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
 S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else
+D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
   // a helper function for launching the kernel. The instructions in the helper
   // function has nothing to do with the source code of the kernel. Do not emit
@@ -8851,6 +8854,7 @@
   case ParsedAttr::AT_CalledOnce:
 handleCalledOnceAttr(S, D, AL);
 break;
+  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
 handleGlobalAttr(S, D, AL);
 break;
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7373,6 +7373,11 @@
   }
 }
   }
+
+  // Attach kernel metadata directly if compiling for NVPTX.
+  if (FD->hasAttr()) {
+addNVVMMetadata(F, "kernel", 1);
+  }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Index: clang/include/clang/Basic/Attr.td
===
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -414,6 +414,7 @@
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -1221,6 +1222,12 @@
 }
 def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
 
+def NVPTXKernel : InheritableAttr, TargetSpecificAttr {
+  let Spellings

[PATCH] D146750: [NVPTX] Set the atomic inling threshold when targeting NVPTX directly

2023-03-23 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGaf54d1e85285: [NVPTX] Set the atomic inling threshold when 
targeting NVPTX directly (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146750/new/

https://reviews.llvm.org/D146750

Files:
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/test/CodeGen/atomics-inlining.c


Index: clang/test/CodeGen/atomics-inlining.c
===
--- clang/test/CodeGen/atomics-inlining.c
+++ clang/test/CodeGen/atomics-inlining.c
@@ -8,6 +8,7 @@
 // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | 
FileCheck %s -check-prefix=MIPS64
 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck 
%s -check-prefix=SPARCV8 -check-prefix=SPARC
 // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck 
%s -check-prefix=SPARCV9 -check-prefix=SPARC
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck 
%s -check-prefix=NVPTX
 
 unsigned char c1, c2;
 unsigned short s1, s2;
@@ -109,4 +110,17 @@
 // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr 
noundef @a2
 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr 
noundef @a2
+
+// NVPTX-LABEL: define{{.*}} void @test1
+// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1
+// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1
+// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2
+// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
+// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4
+// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
+// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8
+// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
+// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr 
noundef @a2, i32 noundef 5)
+// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr 
noundef @a2, i32 noundef 5)
+
 }
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -93,6 +93,8 @@
 default:
   llvm_unreachable("TargetPointerWidth must be 32 or 64");
 }
+
+MaxAtomicInlineWidth = TargetPointerWidth;
 return;
   }
 


Index: clang/test/CodeGen/atomics-inlining.c
===
--- clang/test/CodeGen/atomics-inlining.c
+++ clang/test/CodeGen/atomics-inlining.c
@@ -8,6 +8,7 @@
 // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS64
 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV8 -check-prefix=SPARC
 // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV9 -check-prefix=SPARC
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s -check-prefix=NVPTX
 
 unsigned char c1, c2;
 unsigned short s1, s2;
@@ -109,4 +110,17 @@
 // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
+
+// NVPTX-LABEL: define{{.*}} void @test1
+// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1
+// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1
+// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2
+// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
+// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4
+// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
+// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8
+// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
+// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+
 }
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -93,6 +93,8 @@
 default:
   llvm_unreachable("TargetPointerWidth must be 32 or 64");
 }
+
+MaxAtomicInlineWidth = TargetPointerWidth;
 return;
   }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140226: [NVPTX] Introduce attribute to mark kernels without a language mode

2023-03-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

@tra would it be possible to go to the earlier version that simply duplicated a 
slight amount of logic to introduce the new and separate attribute 
`nvptx_kernel`? Overloading CUDA's `device` attribute is problematic because 
it's used and checked in several different contexts. I'd like to be able to 
simplify this code 
https://github.com/llvm/llvm-project/blob/main/libc/startup/gpu/nvptx/start.cpp.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140226/new/

https://reviews.llvm.org/D140226

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146750: [NVPTX] Set the atomic inling threshold when targeting NVPTX directly

2023-03-23 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: tra, arsenm, tianshilei1992, JonChesterfield.
Herald added subscribers: mattd, gchakrabarti, asavonic.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, wdng, jholewinski.
Herald added a project: clang.

Since Clang 16.0.0 users can target the `NVPTX` architecture directly
via `--target=nvptx64-nvidia-cuda`. However, this does not set the
atomic inlining size correctly. This leads to spurious warnings and
emission of runtime atomics that are never implemented. This patch
ensures that we set this to the appropriate pointer width. This will
always be 64 in the future as `nvptx64` will only be supported moving
forward.

Fixes: https://github.com/llvm/llvm-project/issues/61410


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146750

Files:
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/test/CodeGen/atomics-inlining.c


Index: clang/test/CodeGen/atomics-inlining.c
===
--- clang/test/CodeGen/atomics-inlining.c
+++ clang/test/CodeGen/atomics-inlining.c
@@ -8,6 +8,7 @@
 // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | 
FileCheck %s -check-prefix=MIPS64
 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck 
%s -check-prefix=SPARCV8 -check-prefix=SPARC
 // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck 
%s -check-prefix=SPARCV9 -check-prefix=SPARC
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck 
%s -check-prefix=NVPTX
 
 unsigned char c1, c2;
 unsigned short s1, s2;
@@ -109,4 +110,17 @@
 // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr 
noundef @a2
 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr 
noundef @a2
+
+// NVPTX-LABEL: define{{.*}} void @test1
+// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1
+// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1
+// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2
+// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
+// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4
+// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
+// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8
+// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
+// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr 
noundef @a2, i32 noundef 5)
+// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr 
noundef @a2, i32 noundef 5)
+
 }
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -93,6 +93,8 @@
 default:
   llvm_unreachable("TargetPointerWidth must be 32 or 64");
 }
+
+MaxAtomicInlineWidth = TargetPointerWidth;
 return;
   }
 


Index: clang/test/CodeGen/atomics-inlining.c
===
--- clang/test/CodeGen/atomics-inlining.c
+++ clang/test/CodeGen/atomics-inlining.c
@@ -8,6 +8,7 @@
 // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS64
 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV8 -check-prefix=SPARC
 // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV9 -check-prefix=SPARC
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s -check-prefix=NVPTX
 
 unsigned char c1, c2;
 unsigned short s1, s2;
@@ -109,4 +110,17 @@
 // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
+
+// NVPTX-LABEL: define{{.*}} void @test1
+// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1
+// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1
+// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2
+// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
+// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4
+// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
+// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8
+// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
+// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+
 }
Index: clang/lib/Basic/Targets/NVPTX.cpp
===
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -93,6 +93,8 @@
 default:
   llvm_unreachable("TargetPointerWidth mus

[PATCH] D146552: [Clang][OpenMP] Enable device-mapped constexpr class members to not be optimized out

2023-03-22 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 accepted this revision.
jhuber6 added a comment.

LG, thanks.

If you include `Fixes: ` in the commit message it'll automatically 
close it by the way.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146552/new/

https://reviews.llvm.org/D146552

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146552: [Clang][OpenMP] Enable device-mapped constexpr class members to not be optimized out

2023-03-21 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

We should have a clang test as well


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146552/new/

https://reviews.llvm.org/D146552

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146326: [LinkerWrapper] Do not extract globals with no offloading language

2023-03-17 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 506190.
jhuber6 added a comment.

Remove whitespace


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146326/new/

https://reviews.llvm.org/D146326

Files:
  clang/test/Driver/linker-wrapper-libs.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1162,7 +1162,8 @@
 
 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
 /// extract any symbols from it.
-Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, StringSaver &Saver,
+Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
+ StringSaver &Saver,
  DenseMap &Syms) {
   Expected IRSymtabOrErr = readIRSymtab(Buffer);
   if (!IRSymtabOrErr)
@@ -1182,9 +1183,10 @@
   ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
!Sym.isUndefined());
   // We will extract if it defines a new global symbol visible to the host.
+  // This is only necessary for code targeting an offloading language.
   bool NewGlobalSymbol =
   ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
-   !Sym.canBeOmittedFromSymbolTable() &&
+   !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
(Sym.getVisibility() != GlobalValue::HiddenVisibility));
   ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
 
@@ -1203,7 +1205,8 @@
 
 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
 /// any symbols from it.
-Expected getSymbolsFromObject(const ObjectFile &Obj, StringSaver &Saver,
+Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
+StringSaver &Saver,
 DenseMap &Syms) {
   bool ShouldExtract = false;
   for (SymbolRef Sym : Obj.symbols()) {
@@ -1228,9 +1231,11 @@
!(*FlagsOrErr & SymbolRef::SF_Undefined);
 
 // We will extract if it defines a new global symbol visible to the host.
-bool NewGlobalSymbol = ((NewSymbol || (OldSym & Sym_Undefined)) &&
-!(*FlagsOrErr & SymbolRef::SF_Undefined) &&
-!(*FlagsOrErr & SymbolRef::SF_Hidden));
+// This is only necessary for code targeting an offloading language.
+bool NewGlobalSymbol =
+((NewSymbol || (OldSym & Sym_Undefined)) &&
+ !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
+ !(*FlagsOrErr & SymbolRef::SF_Hidden));
 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
 
 // Update this symbol in the "table" with the new information.
@@ -1250,18 +1255,18 @@
 ///   1) It defines an undefined symbol in a regular object filie.
 ///   2) It defines a global symbol without hidden visibility that has not
 ///  yet been defined.
-Expected getSymbols(StringRef Image, StringSaver &Saver,
+Expected getSymbols(StringRef Image, OffloadKind Kind, StringSaver &Saver,
   DenseMap &Syms) {
   MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
   switch (identify_magic(Image)) {
   case file_magic::bitcode:
-return getSymbolsFromBitcode(Buffer, Saver, Syms);
+return getSymbolsFromBitcode(Buffer, Kind, Saver, Syms);
   case file_magic::elf_relocatable: {
 Expected> ObjFile =
 ObjectFile::createObjectFile(Buffer);
 if (!ObjFile)
   return ObjFile.takeError();
-return getSymbolsFromObject(**ObjFile, Saver, Syms);
+return getSymbolsFromObject(**ObjFile, Kind, Saver, Syms);
   }
   default:
 return false;
@@ -1336,8 +1341,9 @@
 if (IsArchive && !WholeArchive && !Syms.count(Binary))
   continue;
 
-Expected ExtractOrErr =
-getSymbols(Binary.getBinary()->getImage(), Saver, Syms[Binary]);
+Expected ExtractOrErr = getSymbols(
+Binary.getBinary()->getImage(),
+Binary.getBinary()->getOffloadKind(), Saver, Syms[Binary]);
 if (!ExtractOrErr)
   return ExtractOrErr.takeError();
 
Index: clang/test/Driver/linker-wrapper-libs.c
===
--- clang/test/Driver/linker-wrapper-libs.c
+++ clang/test/Driver/linker-wrapper-libs.c
@@ -64,6 +64,29 @@
 // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
 
+//
+// Check that we do not extract a global symbol if the source file was not
+// created by an offloading language that expects there to be a host version of
+// the symbol.
+/

[PATCH] D146326: [LinkerWrapper] Do not extract globals with no offloading language

2023-03-17 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, JonChesterfield, tra, yaxunl, 
tianshilei1992.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

The linker wrapper needs to reinvent its own special static library
handling for static libraries containing fatbinaries. This is primarily
because offloading languages expect certain global symbols to be visible
to the host so we must consider them used symbols. However we should be
able to remove this requirement if we are linking in "freestanding" code
that was not created by an offloading language.

The motivation for this is to support the work-in-progress `libc` for
GPUs. It is provided as a static library with no offloading language
set. This logic will let us only import used `libc` symbols always.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146326

Files:
  clang/test/Driver/linker-wrapper-libs.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1162,7 +1162,8 @@
 
 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
 /// extract any symbols from it.
-Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, StringSaver &Saver,
+Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
+ StringSaver &Saver,
  DenseMap &Syms) {
   Expected IRSymtabOrErr = readIRSymtab(Buffer);
   if (!IRSymtabOrErr)
@@ -1182,9 +1183,10 @@
   ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
!Sym.isUndefined());
   // We will extract if it defines a new global symbol visible to the host.
+  // This is only necessary for code targeting an offloading language.
   bool NewGlobalSymbol =
   ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
-   !Sym.canBeOmittedFromSymbolTable() &&
+   !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
(Sym.getVisibility() != GlobalValue::HiddenVisibility));
   ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
 
@@ -1203,7 +1205,8 @@
 
 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
 /// any symbols from it.
-Expected getSymbolsFromObject(const ObjectFile &Obj, StringSaver &Saver,
+Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
+StringSaver &Saver,
 DenseMap &Syms) {
   bool ShouldExtract = false;
   for (SymbolRef Sym : Obj.symbols()) {
@@ -1225,12 +1228,15 @@
 // We will extract if it defines a currenlty undefined non-weak symbol.
 bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
!(OldSym & Sym_Weak) &&
+
!(*FlagsOrErr & SymbolRef::SF_Undefined);
 
 // We will extract if it defines a new global symbol visible to the host.
-bool NewGlobalSymbol = ((NewSymbol || (OldSym & Sym_Undefined)) &&
-!(*FlagsOrErr & SymbolRef::SF_Undefined) &&
-!(*FlagsOrErr & SymbolRef::SF_Hidden));
+// This is only necessary for code targeting an offloading language.
+bool NewGlobalSymbol =
+((NewSymbol || (OldSym & Sym_Undefined)) &&
+ !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
+ !(*FlagsOrErr & SymbolRef::SF_Hidden));
 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
 
 // Update this symbol in the "table" with the new information.
@@ -1250,18 +1256,18 @@
 ///   1) It defines an undefined symbol in a regular object filie.
 ///   2) It defines a global symbol without hidden visibility that has not
 ///  yet been defined.
-Expected getSymbols(StringRef Image, StringSaver &Saver,
+Expected getSymbols(StringRef Image, OffloadKind Kind, StringSaver &Saver,
   DenseMap &Syms) {
   MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
   switch (identify_magic(Image)) {
   case file_magic::bitcode:
-return getSymbolsFromBitcode(Buffer, Saver, Syms);
+return getSymbolsFromBitcode(Buffer, Kind, Saver, Syms);
   case file_magic::elf_relocatable: {
 Expected> ObjFile =
 ObjectFile::createObjectFile(Buffer);
 if (!ObjFile)
   return ObjFile.takeError();
-return getSymbolsFromObject(**ObjFile, Saver, Syms);
+return getSymbolsFromObject(**ObjFile, Kind, Saver, Syms);
   }
   default:
 return false;
@@ -1336,8 +1342,9 @@
 if (IsArchive && !WholeArchive && !Syms.count(Binary))
   continue;
 
-Expected ExtractO

[PATCH] D145815: [Flang][Driver] Add support for fopenmp-is-device and fembed-offload-object to Flang ToolChain

2023-03-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 accepted this revision.
jhuber6 added a comment.

LGTM, it's much simpler for now since Flang doesn't support CUDA, HIP, OpenCL, 
OpenMP, etc.




Comment at: flang/test/Driver/omp-frontend-forwarding.f90:1
+! REQUIRES: amdgpu-registered-target
+

agozillon wrote:
> awarzynski wrote:
> > Given that you use `-###`, I think that this can be skipped (please double 
> > check).
> It does appear that it can be, at the very least I can swap in an NVIIDIA 
> arch when I haven't configured the project to target it and it has no issues! 
> Thank you. 
I'm not completely familiar with Flangs status on this, do we have tests in 
tree that perform the entire build and check `-ccc-print-bindings/phases` like 
we do in Clang?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145815/new/

https://reviews.llvm.org/D145815

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145815: [Flang][Driver] Add support for fopenmp-is-device and fembed-offload-object to Flang ToolChain

2023-03-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Flang.cpp:128
+  if (IsHostOffloadingAction) {
+for (size_t i = 1; i < Inputs.size(); ++i) {
+  if (Inputs[i].getType() != types::TY_Nothing)

agozillon wrote:
> jhuber6 wrote:
> > agozillon wrote:
> > > awarzynski wrote:
> > > > agozillon wrote:
> > > > > awarzynski wrote:
> > > > > > What's the magic "1"? And given that the input count matters here - 
> > > > > > is there a test with multiple inputs?
> > > > > It aims to mimic the behavior of Clang: 
> > > > > https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/Clang.cpp#L4561
> > > > >  where the main input is skipped (the input currently being compiled 
> > > > > or embedded into etc.), when adding to //-fembed-offload-object//. 
> > > > > 
> > > > > It does look different to Clang's as Clang has more cases and the 
> > > > > logic is spread across the constructJob invocation, but the first if 
> > > > > case is what the if statement inside of the loop and setting the loop 
> > > > > index variable to 1 do. The HostOffloadingInputs array is what is 
> > > > > being generated here, except we're skipping and directly applying it 
> > > > > as arguments.
> > > > > 
> > > > > I tried to condense it a little in this case! Perhaps it loses 
> > > > > readability though, I had hoped the comment might have kept it clear
> > > > Thanks for the link - that code in Clang doesn't really clarify what 
> > > > makes `Inputs[0]` special 🤔 . 
> > > > 
> > > > Let me rephrase my question - what's so special about the first input? 
> > > > (referred to in Clang as "main input") Is that something specific to 
> > > > OpenMP? For example, in this case:
> > > > ```
> > > > flang-new  -fopenmp  file.f90
> > > > ```
> > > > I assume that `inputs[0]` is "file.f90", so nothing will happen?
> > > > 
> > > > > I tried to condense it a little in this case! Perhaps it loses 
> > > > > readability though, I had hoped the comment might have kept it clear
> > > > 
> > > > Nah, I think that your implementation is fine. It's my ignorance with 
> > > > respect to OpenMP that's the problem here ;-)
> > > It's not specific to OpenMP I believe, as far as I am aware Clang's 
> > > supported offload models (SYCL and CUDA as well as OpenMP) use it! In 
> > > Flang's case we only really care about OpenMP as I believe it's the only 
> > > offload programming model supported.
> > > 
> > > In the case of the command: 
> > > 
> > > ```
> > > flang-new -fopenmp file.f90
> > > ``` 
> > > The code should never be executed as no part of the command will enable 
> > > an offloading action (for device or host)! But yes inputs[0] would indeed 
> > > refer to file.f90.
> > > 
> > > However, this code becomes relevant when you utilise an option that 
> > > enables the clangDriver to perform some form of offloading action. For 
> > > example a command like: 
> > > 
> > > ```
> > > flang-new -fopenmp --offload-arch=gfx90a file.f90 
> > > ```
> > > Will trigger two phase compilation, one for the host device (your 
> > > resident CPU in this command) and one for the device (gfx90a in this 
> > > command), the regular host pass will invoke like your provided command 
> > > and the device pass will then invoke with -fopenmp-is-device in addition 
> > > alongside the device triple. This generates two bitcode files from the 
> > > one file, one containing the host code from the file, the other the 
> > > device code (extracted from OpenMP target regions or declare target 
> > > etc.). 
> > > 
> > > However, now we have two files, with both parts of our program, we need 
> > > to conjoin them together, the clangDriver generates an embeddable 
> > > fat-binary/binary using the clang-offload-packager and then invokes 
> > > flang-new again, and this is where the above code becomes relevant, as 
> > > this binary (or multiple binaries, if we target multiple devices in the 
> > > same program) is what is passed to -fembed-offload-object! And inputs[0] 
> > > in this case would refer to the output from the original host pass, which 
> > > is what we want to embed the device binary into, so we wish to skip this 
> > > original host output and only pass the subsequent inputs (which should be 
> > > device binaries when the clangDriver initiates a host offloading action) 
> > > we want to embed as -fembed-offload-object arguments. 
> > > 
> > > The offloading driver is quite complex and my knowledge of it is not 
> > > perfect as I am not our resident expert on it unfortunately (so if anyone 
> > > sees anything incorrect, please do chime in and correct me)! 
> > > 
> > > But hopefully this answers your question and gives you an idea of when 
> > > and how this -fembed-offload-object comes into play, essentially a way to 
> > > get the device binaries we wish to insert into the host binary, so it can 
> > > load the binaries at runtime and execute them. Currently upstream Flang 
> > > doe

[PATCH] D145815: [Flang][Driver] Add support for fopenmp-is-device and fembed-offload-object to Flang ToolChain

2023-03-16 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Flang.cpp:128
+  if (IsHostOffloadingAction) {
+for (size_t i = 1; i < Inputs.size(); ++i) {
+  if (Inputs[i].getType() != types::TY_Nothing)

agozillon wrote:
> awarzynski wrote:
> > agozillon wrote:
> > > awarzynski wrote:
> > > > What's the magic "1"? And given that the input count matters here - is 
> > > > there a test with multiple inputs?
> > > It aims to mimic the behavior of Clang: 
> > > https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/Clang.cpp#L4561
> > >  where the main input is skipped (the input currently being compiled or 
> > > embedded into etc.), when adding to //-fembed-offload-object//. 
> > > 
> > > It does look different to Clang's as Clang has more cases and the logic 
> > > is spread across the constructJob invocation, but the first if case is 
> > > what the if statement inside of the loop and setting the loop index 
> > > variable to 1 do. The HostOffloadingInputs array is what is being 
> > > generated here, except we're skipping and directly applying it as 
> > > arguments.
> > > 
> > > I tried to condense it a little in this case! Perhaps it loses 
> > > readability though, I had hoped the comment might have kept it clear
> > Thanks for the link - that code in Clang doesn't really clarify what makes 
> > `Inputs[0]` special 🤔 . 
> > 
> > Let me rephrase my question - what's so special about the first input? 
> > (referred to in Clang as "main input") Is that something specific to 
> > OpenMP? For example, in this case:
> > ```
> > flang-new  -fopenmp  file.f90
> > ```
> > I assume that `inputs[0]` is "file.f90", so nothing will happen?
> > 
> > > I tried to condense it a little in this case! Perhaps it loses 
> > > readability though, I had hoped the comment might have kept it clear
> > 
> > Nah, I think that your implementation is fine. It's my ignorance with 
> > respect to OpenMP that's the problem here ;-)
> It's not specific to OpenMP I believe, as far as I am aware Clang's supported 
> offload models (SYCL and CUDA as well as OpenMP) use it! In Flang's case we 
> only really care about OpenMP as I believe it's the only offload programming 
> model supported.
> 
> In the case of the command: 
> 
> ```
> flang-new -fopenmp file.f90
> ``` 
> The code should never be executed as no part of the command will enable an 
> offloading action (for device or host)! But yes inputs[0] would indeed refer 
> to file.f90.
> 
> However, this code becomes relevant when you utilise an option that enables 
> the clangDriver to perform some form of offloading action. For example a 
> command like: 
> 
> ```
> flang-new -fopenmp --offload-arch=gfx90a file.f90 
> ```
> Will trigger two phase compilation, one for the host device (your resident 
> CPU in this command) and one for the device (gfx90a in this command), the 
> regular host pass will invoke like your provided command and the device pass 
> will then invoke with -fopenmp-is-device in addition alongside the device 
> triple. This generates two bitcode files from the one file, one containing 
> the host code from the file, the other the device code (extracted from OpenMP 
> target regions or declare target etc.). 
> 
> However, now we have two files, with both parts of our program, we need to 
> conjoin them together, the clangDriver generates an embeddable 
> fat-binary/binary using the clang-offload-packager and then invokes flang-new 
> again, and this is where the above code becomes relevant, as this binary (or 
> multiple binaries, if we target multiple devices in the same program) is what 
> is passed to -fembed-offload-object! And inputs[0] in this case would refer 
> to the output from the original host pass, which is what we want to embed the 
> device binary into, so we wish to skip this original host output and only 
> pass the subsequent inputs (which should be device binaries when the 
> clangDriver initiates a host offloading action) we want to embed as 
> -fembed-offload-object arguments. 
> 
> The offloading driver is quite complex and my knowledge of it is not perfect 
> as I am not our resident expert on it unfortunately (so if anyone sees 
> anything incorrect, please do chime in and correct me)! 
> 
> But hopefully this answers your question and gives you an idea of when and 
> how this -fembed-offload-object comes into play, essentially a way to get the 
> device binaries we wish to insert into the host binary, so it can load the 
> binaries at runtime and execute them. Currently upstream Flang doesn't 
> utilise this option of course, but we intend to use this as part of our 
> OpenMP offloading efforts for AMD devices (whilst leaving the door open for 
> other vendors devices as well). We are trying to re-use/mimic as much of the 
> existing machinery that the clangDriver implements as we can! 
>  
The compiler requires at least one input file to run, otherwise it exits early.

[PATCH] D144569: [Clang][OpenMP] Fix accessing of aligned arrays in offloaded target regions

2023-03-15 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 accepted this revision.
jhuber6 added a comment.
This revision is now accepted and ready to land.
Herald added a subscriber: jplehr.

Pretty sure this is what we discussed in the meeting. So it should be fine to 
remove the alignment requirement on the declaration since we don't handle it 
properly by ref either.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144569/new/

https://reviews.llvm.org/D144569

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145941: [Clang] Always use --no-undefined when linking AMDGPU images

2023-03-14 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG55f38495e38e: [Clang] Always use --no-undefined when linking 
AMDGPU images (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D145941?vs=505166&id=505185#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145941/new/

https://reviews.llvm.org/D145941

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain-opencl.cl
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "--no-undefined" "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/test/Driver/amdgpu-toolchain-opencl.cl
===
--- clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -25,3 +25,6 @@
 
 // CHK-INC: "-cc1" {{.*}}"-finclude-default-header" 
"-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
 // CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" 
"-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -emit-llvm 
-mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-LINK %s
+// CHK-LINK: ld.lld{{.*}} "--no-undefined" "-shared"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -543,6 +543,7 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -231,6 +231,14 @@
 Target Specific Changes
 ---
 
+AMDGPU Support
+^^
+
+- Linking for AMDGPU now uses ``--no-undefined`` by default. This causes
+  undefined symbols in the created module to be a linker error. To prevent 
this,
+  pass ``-Wl,--undefined`` if compiling directly, or ``-Xoffload-linker
+  --undefined`` if using an offloading language.
+
 X86 Support
 ^^^
 


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "--no-undefined" "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/test/Driver/amdgpu-toolchain-opencl.cl
===
--- clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -25,3 +25,6 @@
 
 // CHK-INC: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
 // CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-LINK %s
+// CHK-LINK: ld.lld{{.*}} "--no-undefined" "-shared"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -543,6 +543,7 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -231,6 +231,14 @@
 Target Specific Changes
 ---
 
+AMDGPU Support
+^^
+
+- Linking for AMDGPU now uses ``--no-undefined`` by default. This causes
+  undefined symbols in the created module to be a linker error. To prevent this,
+  pass ``-Wl,--undefined`` if compiling directly, or ``-Xoffload-linker
+ 

[PATCH] D145941: [Clang] Always use -zdefs when linking AMDGPU images

2023-03-14 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 505166.
jhuber6 added a comment.

Adding release notes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145941/new/

https://reviews.llvm.org/D145941

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain-opencl.cl
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "--no-undefined" "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/test/Driver/amdgpu-toolchain-opencl.cl
===
--- clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -25,3 +25,6 @@
 
 // CHK-INC: "-cc1" {{.*}}"-finclude-default-header" 
"-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
 // CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" 
"-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -emit-llvm 
-mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-LINK %s
+// CHK-LINK: ld.lld{{.*}} "--no-undefined" "-shared"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -543,6 +543,7 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -222,6 +222,14 @@
 Target Specific Changes
 ---
 
+AMDGPU Support
+^^
+
+- Linking for AMDGPU now uses ``--no-undefined`` by default. This causes 
+  undefined symbols in the created module to be a linker error. To prevent 
this, 
+  pass ``-Wl,--undefined`` if compiling directly, or ``-Xoffload-linker 
+  --undefined`` if using an offloading language.
+
 X86 Support
 ^^^
 


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "--no-undefined" "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/test/Driver/amdgpu-toolchain-opencl.cl
===
--- clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -25,3 +25,6 @@
 
 // CHK-INC: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
 // CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-LINK %s
+// CHK-LINK: ld.lld{{.*}} "--no-undefined" "-shared"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -543,6 +543,7 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -222,6 +222,14 @@
 Target Specific Changes
 ---
 
+AMDGPU Support
+^^
+
+- Linking for AMDGPU now uses ``--no-undefined`` by default. This causes 
+  undefined symbols in the created module to be a linker error. To prevent this, 
+  pass ``-Wl,--undefined`` if compiling directly, or ``-Xoffload-linker 
+  --undefined`` if using an offloading language.
+
 X86 Support
 ^^^
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-com

[PATCH] D145941: [Clang] Always use -zdefs when linking AMDGPU images

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 504818.
jhuber6 added a comment.

Use `--no-undefined` to be consistent with HIP and check for OpenCL.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145941/new/

https://reviews.llvm.org/D145941

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain-opencl.cl
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "--no-undefined" "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/test/Driver/amdgpu-toolchain-opencl.cl
===
--- clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -25,3 +25,6 @@
 
 // CHK-INC: "-cc1" {{.*}}"-finclude-default-header" 
"-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
 // CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" 
"-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -emit-llvm 
-mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-LINK %s
+// CHK-LINK: ld.lld{{.*}} "--no-undefined" "-shared"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -543,6 +543,7 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "--no-undefined" "-shared"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/test/Driver/amdgpu-toolchain-opencl.cl
===
--- clang/test/Driver/amdgpu-toolchain-opencl.cl
+++ clang/test/Driver/amdgpu-toolchain-opencl.cl
@@ -25,3 +25,6 @@
 
 // CHK-INC: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cl"
 // CHK-INC-NOT: "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa-opencl -x cl -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-LINK %s
+// CHK-LINK: ld.lld{{.*}} "--no-undefined" "-shared"
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -543,6 +543,7 @@
   if (C.getDriver().isUsingLTO())
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
+  CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145862: [LinkerWrapper] Switch to add_clang_tool() macro

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc2aabcfc8395: [LinkerWrapper] Switch to add_clang_tool() 
macro (authored by foutrelis, committed by jhuber6).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145862/new/

https://reviews.llvm.org/D145862

Files:
  clang/tools/clang-linker-wrapper/CMakeLists.txt


Index: clang/tools/clang-linker-wrapper/CMakeLists.txt
===
--- clang/tools/clang-linker-wrapper/CMakeLists.txt
+++ clang/tools/clang-linker-wrapper/CMakeLists.txt
@@ -1,5 +1,3 @@
-include(GNUInstallDirs)
-
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   BitWriter
@@ -27,7 +25,7 @@
   set(tablegen_deps intrinsics_gen LinkerWrapperOpts)
 endif()
 
-add_clang_executable(clang-linker-wrapper
+add_clang_tool(clang-linker-wrapper
   ClangLinkerWrapper.cpp
   OffloadWrapper.cpp
 
@@ -43,5 +41,3 @@
   PRIVATE
   ${CLANG_LINKER_WRAPPER_LIB_DEPS}
   )
-
-install(TARGETS clang-linker-wrapper RUNTIME DESTINATION 
"${CMAKE_INSTALL_BINDIR}")


Index: clang/tools/clang-linker-wrapper/CMakeLists.txt
===
--- clang/tools/clang-linker-wrapper/CMakeLists.txt
+++ clang/tools/clang-linker-wrapper/CMakeLists.txt
@@ -1,5 +1,3 @@
-include(GNUInstallDirs)
-
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   BitWriter
@@ -27,7 +25,7 @@
   set(tablegen_deps intrinsics_gen LinkerWrapperOpts)
 endif()
 
-add_clang_executable(clang-linker-wrapper
+add_clang_tool(clang-linker-wrapper
   ClangLinkerWrapper.cpp
   OffloadWrapper.cpp
 
@@ -43,5 +41,3 @@
   PRIVATE
   ${CLANG_LINKER_WRAPPER_LIB_DEPS}
   )
-
-install(TARGETS clang-linker-wrapper RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145944: [Clang] Add --version and --help messages to amdgpu/nvptx-arch

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGa26aabefe535: [Clang] Add --version and --help messages to 
amdgpu/nvptx-arch (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145944/new/

https://reviews.llvm.org/D145944

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt
  clang/tools/nvptx-arch/CMakeLists.txt
  clang/tools/nvptx-arch/NVPTXArch.cpp

Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -11,12 +11,25 @@
 //
 //===--===//
 
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Error.h"
 #include 
 #include 
 #include 
 
+using namespace llvm;
+
+static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+static void PrintVersion(raw_ostream &OS) {
+  OS << clang::getClangToolFullVersion("nvptx-arch") << '\n';
+}
+// Mark all our options with this category, everything else (except for -version
+// and -help) will be hidden.
+static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
+
 #if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
@@ -79,6 +92,21 @@
 }
 
 int main(int argc, char *argv[]) {
+  cl::HideUnrelatedOptions(NVPTXArchCategory);
+
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A tool to detect the presence of NVIDIA devices on the system. \n\n"
+  "The tool will output each detected GPU architecture separated by a\n"
+  "newline character. If multiple GPUs of the same architecture are found\n"
+  "a string will be printed for each\n");
+
+  if (Help) {
+cl::PrintHelpMessage();
+return 0;
+  }
+
   // Attempt to load the NVPTX driver runtime.
   if (llvm::Error Err = loadCUDA()) {
 logAllUnhandledErrors(std::move(Err), llvm::errs());
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -14,6 +14,7 @@
 # If we found the CUDA library directly we just dynamically link against it.
 if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
   target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver)
+  clang_target_link_libraries(nvptx-arch PRIVATE clangBasic)
 else()
   target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
 endif()
Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- clang/tools/amdgpu-arch/CMakeLists.txt
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -15,6 +15,7 @@
 if(hsa-runtime64_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
   set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
   target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
+  clang_target_link_libraries(amdgpu-arch PRIVATE clangBasic)
 else()
   target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
 endif()
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,12 +11,25 @@
 //
 //===--===//
 
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Error.h"
 #include 
 #include 
 #include 
 
+using namespace llvm;
+
+static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category.
+static cl::OptionCategory AMDGPUArchCategory("amdgpu-arch options");
+
+static void PrintVersion(raw_ostream &OS) {
+  OS << clang::getClangToolFullVersion("amdgpu-arch") << '\n';
+}
+
 #if DYNAMIC_HSA
 typedef enum {
   HSA_STATUS_SUCCESS = 0x0,
@@ -102,6 +115,21 @@
 }
 
 int main(int argc, char *argv[]) {
+  cl::HideUnrelatedOptions(AMDGPUArchCategory);
+
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A tool to detect the presence of AMDGPU devices on the system. \n\n"
+  "The tool will output each detected GPU architecture separated by a\n"
+  "newline character. If multiple GPUs of the same architecture are found\n"
+  "a string will be printed for each\n");
+
+  if (Help) {
+cl::PrintHelpMessage();
+return 0;
+  }
+
   // Attempt to load the HSA runtime.
   if (llvm::Error Err = loadHSA()) {
 logAllUnhandledErrors(std::move(Err), llvm::errs());
_

[PATCH] D145941: [Clang] Always use -zdefs when linking AMDGPU images

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

This can be turned off with `-zundefs`. So we could instruct people to use 
`-Wl,-zundefs` or `-Xoffload-linker -zundefs` if the old behavior is desired.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145941/new/

https://reviews.llvm.org/D145941

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145944: [Clang] Add --version and --help messages to amdgpu/nvptx-arch

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 504670.
jhuber6 added a comment.

Add help print.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145944/new/

https://reviews.llvm.org/D145944

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt
  clang/tools/nvptx-arch/CMakeLists.txt
  clang/tools/nvptx-arch/NVPTXArch.cpp

Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -11,12 +11,25 @@
 //
 //===--===//
 
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Error.h"
 #include 
 #include 
 #include 
 
+using namespace llvm;
+
+static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+static void PrintVersion(raw_ostream &OS) {
+  OS << clang::getClangToolFullVersion("nvptx-arch") << '\n';
+}
+// Mark all our options with this category, everything else (except for -version
+// and -help) will be hidden.
+static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
+
 #if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
@@ -79,6 +92,21 @@
 }
 
 int main(int argc, char *argv[]) {
+  cl::HideUnrelatedOptions(NVPTXArchCategory);
+
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A tool to detect the presence of NVIDIA devices on the system. \n\n"
+  "The tool will output each detected GPU architecture separated by a\n"
+  "newline character. If multiple GPUs of the same architecture are found\n"
+  "a string will be printed for each\n");
+
+  if (Help) {
+cl::PrintHelpMessage();
+return 0;
+  }
+
   // Attempt to load the NVPTX driver runtime.
   if (llvm::Error Err = loadCUDA()) {
 logAllUnhandledErrors(std::move(Err), llvm::errs());
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -14,6 +14,7 @@
 # If we found the CUDA library directly we just dynamically link against it.
 if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
   target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver)
+  clang_target_link_libraries(nvptx-arch PRIVATE clangBasic)
 else()
   target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
 endif()
Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- clang/tools/amdgpu-arch/CMakeLists.txt
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -15,6 +15,7 @@
 if(hsa-runtime64_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
   set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
   target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
+  clang_target_link_libraries(amdgpu-arch PRIVATE clangBasic)
 else()
   target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
 endif()
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,12 +11,25 @@
 //
 //===--===//
 
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Error.h"
 #include 
 #include 
 #include 
 
+using namespace llvm;
+
+static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category.
+static cl::OptionCategory AMDGPUArchCategory("amdgpu-arch options");
+
+static void PrintVersion(raw_ostream &OS) {
+  OS << clang::getClangToolFullVersion("amdgpu-arch") << '\n';
+}
+
 #if DYNAMIC_HSA
 typedef enum {
   HSA_STATUS_SUCCESS = 0x0,
@@ -102,6 +115,21 @@
 }
 
 int main(int argc, char *argv[]) {
+  cl::HideUnrelatedOptions(AMDGPUArchCategory);
+
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A tool to detect the presence of AMDGPU devices on the system. \n\n"
+  "The tool will output each detected GPU architecture separated by a\n"
+  "newline character. If multiple GPUs of the same architecture are found\n"
+  "a string will be printed for each\n");
+
+  if (Help) {
+cl::PrintHelpMessage();
+return 0;
+  }
+
   // Attempt to load the HSA runtime.
   if (llvm::Error Err = loadHSA()) {
 logAllUnhandledErrors(std::move(Err), llvm::errs());
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145944: [Clang] Add --version and --help messages to amdgpu/nvptx-arch

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield, ye-luo.
Herald added subscribers: kosarev, mattd, gchakrabarti, asavonic, kerbowa, tpr, 
dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, wdng, jholewinski.
Herald added a project: clang.

Summray:
These clang tools should print some basic help and version messages so
they are less opaque.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D145944

Files:
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt
  clang/tools/nvptx-arch/CMakeLists.txt
  clang/tools/nvptx-arch/NVPTXArch.cpp

Index: clang/tools/nvptx-arch/NVPTXArch.cpp
===
--- clang/tools/nvptx-arch/NVPTXArch.cpp
+++ clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -11,12 +11,25 @@
 //
 //===--===//
 
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Error.h"
 #include 
 #include 
 #include 
 
+using namespace llvm;
+
+static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+static void PrintVersion(raw_ostream &OS) {
+  OS << clang::getClangToolFullVersion("nvptx-arch") << '\n';
+}
+// Mark all our options with this category, everything else (except for -version
+// and -help) will be hidden.
+static cl::OptionCategory NVPTXArchCategory("nvptx-arch options");
+
 #if DYNAMIC_CUDA
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
@@ -79,6 +92,16 @@
 }
 
 int main(int argc, char *argv[]) {
+  cl::HideUnrelatedOptions(NVPTXArchCategory);
+
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A tool to detect the presence of NVIDIA devices on the system. \n\n"
+  "The tool will output each detected GPU architecture separated by a\n"
+  "newline character. If multiple GPUs of the same architecture are found\n"
+  "a string will be printed for each\n");
+
   // Attempt to load the NVPTX driver runtime.
   if (llvm::Error Err = loadCUDA()) {
 logAllUnhandledErrors(std::move(Err), llvm::errs());
Index: clang/tools/nvptx-arch/CMakeLists.txt
===
--- clang/tools/nvptx-arch/CMakeLists.txt
+++ clang/tools/nvptx-arch/CMakeLists.txt
@@ -14,6 +14,7 @@
 # If we found the CUDA library directly we just dynamically link against it.
 if(CUDAToolkit_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
   target_link_libraries(nvptx-arch PRIVATE CUDA::cuda_driver)
+  clang_target_link_libraries(nvptx-arch PRIVATE clangBasic)
 else()
   target_compile_definitions(nvptx-arch PRIVATE "DYNAMIC_CUDA")
 endif()
Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- clang/tools/amdgpu-arch/CMakeLists.txt
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -15,6 +15,7 @@
 if(hsa-runtime64_FOUND AND NOT (LLVM_BUILD_32_BITS OR CMAKE_SIZEOF_VOID_P EQUAL 4))
   set_target_properties(amdgpu-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
   target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
+  clang_target_link_libraries(amdgpu-arch PRIVATE clangBasic)
 else()
   target_compile_definitions(amdgpu-arch PRIVATE "DYNAMIC_HSA")
 endif()
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- clang/tools/amdgpu-arch/AMDGPUArch.cpp
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -11,12 +11,25 @@
 //
 //===--===//
 
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Error.h"
 #include 
 #include 
 #include 
 
+using namespace llvm;
+
+static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category.
+static cl::OptionCategory AMDGPUArchCategory("amdgpu-arch options");
+
+static void PrintVersion(raw_ostream &OS) {
+  OS << clang::getClangToolFullVersion("amdgpu-arch") << '\n';
+}
+
 #if DYNAMIC_HSA
 typedef enum {
   HSA_STATUS_SUCCESS = 0x0,
@@ -102,6 +115,16 @@
 }
 
 int main(int argc, char *argv[]) {
+  cl::HideUnrelatedOptions(AMDGPUArchCategory);
+
+  cl::SetVersionPrinter(PrintVersion);
+  cl::ParseCommandLineOptions(
+  argc, argv,
+  "A tool to detect the presence of AMDGPU devices on the system. \n\n"
+  "The tool will output each detected GPU architecture separated by a\n"
+  "newline character. If multiple GPUs of the same architecture are found\n"
+  "a string will be printed for each\n");
+
   // Attempt to load the HSA runtime.
   if (llvm::Error Err = loadHSA()) {
 logAllUnhandledErrors(std::move(Err), llvm::errs())

[PATCH] D145941: [Clang] Always use -zdefs when linking AMDGPU images

2023-03-13 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, JonChesterfield, arsenm, yaxunl, MaskRay.
Herald added subscribers: kosarev, kerbowa, tpr, dstuttard, jvesely, kzhuravl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, wdng.
Herald added a project: clang.

AMDGPU uses ELF shared libraries to implement their executable device
images. One downside to this method is that it disables regular warnings
on undefined symbols. This is because shared libraries expect these to
be resolves by later loads. However, the GPU images do not support
dynamic linking so any undefined symbol is going to cause a runtime
error. This patch adds `-zdefs` to the `ld.lld` invocation to guaruntee
that undefined symbols are always caught as linking errors rather than
runtime errors.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D145941

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/test/Driver/amdgpu-toolchain.c


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "-shared" "-zdefs"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -544,6 +544,7 @@
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
   CmdArgs.push_back("-shared");
+  CmdArgs.push_back("-zdefs");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
   C.addCommand(std::make_unique(


Index: clang/test/Driver/amdgpu-toolchain.c
===
--- clang/test/Driver/amdgpu-toolchain.c
+++ clang/test/Driver/amdgpu-toolchain.c
@@ -6,7 +6,7 @@
 // RUN: %clang -### -g --target=amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
 
 // AS_LINK: "-cc1as"
-// AS_LINK: ld.lld{{.*}} "-shared"
+// AS_LINK: ld.lld{{.*}} "-shared" "-zdefs"
 
 // DWARF_VER: "-dwarf-version=5"
 
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -544,6 +544,7 @@
 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0],
   C.getDriver().getLTOMode() == LTOK_Thin);
   CmdArgs.push_back("-shared");
+  CmdArgs.push_back("-zdefs");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
   C.addCommand(std::make_unique(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145820: Insert alloca for kernel args at function entry block instead of the launch point.

2023-03-10 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

No tests updated?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145820/new/

https://reviews.llvm.org/D145820

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145591: [clang][HIP][OpenMP] Add warning if mixed HIP / OpenMP offloading

2023-03-09 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D145591#4182748 , @yaxunl wrote:

> In D145591#4182360 , @jhuber6 wrote:
>
>> I'm not a fan of the same warning being copied in 24 places. Why do we set 
>> `LangOpts.IsOpenMP` on the GPU compilation side, couldn't we just filter out 
>> the `-fopenmp` or whatever it is for the HIP job?
>
> We cannot filter out `-fopenmp` for HIP job because the host code in HIP 
> program needs it to support "omp parallel for" etc. Filtering it will break 
> existing HIP programs.

I mean, couldn't we just prevent the `-cc1` arguments for the HIP device 
compilation from using any OpenMP? Or is that breaking. I figured it was only 
supported for the CPU portion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145591/new/

https://reviews.llvm.org/D145591

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145591: [clang][HIP][OpenMP] Add warning if mixed HIP / OpenMP offloading

2023-03-09 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

I'm not a fan of the same warning being copied in 24 places. Why do we set 
`LangOpts.IsOpenMP` on the GPU compilation side, couldn't we just filter out 
the `-fopenmp` or whatever it is for the HIP job?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145591/new/

https://reviews.llvm.org/D145591

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D129507: [OffloadPackager] Add option to extract files from images

2023-03-04 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/Driver/offload-packager.c:2-3
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+// UNSUPPORTED: system-windows

bader wrote:
> Are nvptx and amdgpu target required for this test?
> Latest version of the test invokes clang only for x86 target and 
> clang-offload-packager just adds triple as metadata string without using llvm 
> target. Right?
You're right, we could get rid of those. Also this made me realize that the 
test for the bitcode file isn't actually using bitcode, I should probably fix 
that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129507/new/

https://reviews.llvm.org/D129507

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144884: [clang-format] Only add pragma continuation indentation for 'omp' clauses

2023-03-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

In D144884#4165192 , @MyDeveloperDay 
wrote:

> without this change what does this look like?
>
>   EXPECT_EQ(
>   "#pragma omp target   \\\n"
>   "reduction(+ : var)   \\\n"
>   "map(to : A[0 : N])   \\\n"
>   "map(to : B[0 : N])   \\\n"
>   "map(from : C[0 : N]) \\\n"
>   "firstprivate(i)  \\\n"
>   "firstprivate(j)  \\\n"
>   "firstprivate(k)",
>   format(
>   "#pragma omp target reduction(+:var) map(to:A[0:N]) map(to:B[0:N]) "
>   "map(from:C[0:N]) firstprivate(i) firstprivate(j) firstprivate(k)",
>   getLLVMStyleWithColumns(26)));

Like this without the code in the previous patch

  #pragma omp target reduction( \
  + : var) map(to : A   \
   [0 : N]) \
  map(to : B[0 : N]) map(   \
  from : C[0 : N])  \
  firstprivate(i)   \
  firstprivate( \
  j)\
  firstprivate( \
  k)

Like this if we just return the current indent without any extra

  #pragma omp target   \\
  reduction(+ : var)   \\
  map(to : A[0 : N])   \\
  map(to : B[0 : N])   \\
  map(from : C[0 : N]) \\
  firstprivate(i)  \\
  firstprivate(j)  \\
  firstprivate(k)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144884/new/

https://reviews.llvm.org/D144884

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144884: [clang-format] Only add pragma continuation indentation for 'omp' clauses

2023-03-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Format/ContinuationIndenter.cpp:1280
+FormatToken *PragmaType = State.Line->First->Next->Next;
+if (PragmaType && PragmaType->TokenText.equals("omp"))
+  return CurrentState.Indent + Style.ContinuationIndentWidth;

MyDeveloperDay wrote:
> can you add a test that covers this?
There is already a test for the `omp` case and this patch added a new one for 
the non-omp case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144884/new/

https://reviews.llvm.org/D144884

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144884: [clang-format] Only add pragma continuation indentation for 'omp' clauses

2023-02-28 Thread Joseph Huber via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG466b4327f8fc: [clang-format] Only add pragma continuation 
indentation for 'omp' clauses (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144884/new/

https://reviews.llvm.org/D144884

Files:
  clang/lib/Format/ContinuationIndenter.cpp
  clang/unittests/Format/FormatTest.cpp


Index: clang/unittests/Format/FormatTest.cpp
===
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -20560,6 +20560,21 @@
 "(including parentheses).",
 format("#pragmamark   Any non-hyphenated or hyphenated string "
"(including parentheses)."));
+
+  EXPECT_EQ("#pragma mark Any non-hyphenated or hyphenated string "
+"(including parentheses).",
+format("#pragmamark   Any non-hyphenated or hyphenated string "
+   "(including parentheses)."));
+
+  EXPECT_EQ(
+  "#pragma comment(linker,\\\n"
+  "\"argument\" \\\n"
+  "\"argument\"",
+  format("#pragma comment(linker,  \\\n"
+ " \"argument\" \\\n"
+ " \"argument\"",
+ getStyleWithColumns(
+ getChromiumStyle(FormatStyle::LanguageKind::LK_Cpp), 32)));
 }
 
 TEST_F(FormatTest, UnderstandsPragmaOmpTarget) {
Index: clang/lib/Format/ContinuationIndenter.cpp
===
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -1273,8 +1273,13 @@
 return ContinuationIndent;
   }
 
-  if (State.Line->InPragmaDirective)
-return CurrentState.Indent + Style.ContinuationIndentWidth;
+  // OpenMP clauses want to get additional indentation when they are pushed 
onto
+  // the next line.
+  if (State.Line->InPragmaDirective) {
+FormatToken *PragmaType = State.Line->First->Next->Next;
+if (PragmaType && PragmaType->TokenText.equals("omp"))
+  return CurrentState.Indent + Style.ContinuationIndentWidth;
+  }
 
   // This ensure that we correctly format ObjC methods calls without inputs,
   // i.e. where the last element isn't selector like: [callee method];


Index: clang/unittests/Format/FormatTest.cpp
===
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -20560,6 +20560,21 @@
 "(including parentheses).",
 format("#pragmamark   Any non-hyphenated or hyphenated string "
"(including parentheses)."));
+
+  EXPECT_EQ("#pragma mark Any non-hyphenated or hyphenated string "
+"(including parentheses).",
+format("#pragmamark   Any non-hyphenated or hyphenated string "
+   "(including parentheses)."));
+
+  EXPECT_EQ(
+  "#pragma comment(linker,\\\n"
+  "\"argument\" \\\n"
+  "\"argument\"",
+  format("#pragma comment(linker,  \\\n"
+ " \"argument\" \\\n"
+ " \"argument\"",
+ getStyleWithColumns(
+ getChromiumStyle(FormatStyle::LanguageKind::LK_Cpp), 32)));
 }
 
 TEST_F(FormatTest, UnderstandsPragmaOmpTarget) {
Index: clang/lib/Format/ContinuationIndenter.cpp
===
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -1273,8 +1273,13 @@
 return ContinuationIndent;
   }
 
-  if (State.Line->InPragmaDirective)
-return CurrentState.Indent + Style.ContinuationIndentWidth;
+  // OpenMP clauses want to get additional indentation when they are pushed onto
+  // the next line.
+  if (State.Line->InPragmaDirective) {
+FormatToken *PragmaType = State.Line->First->Next->Next;
+if (PragmaType && PragmaType->TokenText.equals("omp"))
+  return CurrentState.Indent + Style.ContinuationIndentWidth;
+  }
 
   // This ensure that we correctly format ObjC methods calls without inputs,
   // i.e. where the last element isn't selector like: [callee method];
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144993: [OpenMP]Emit captured decls for target data if no devices were specified.

2023-02-28 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 accepted this revision.
jhuber6 added a comment.
This revision is now accepted and ready to land.

LG, thanks a lot for the quick fix.




Comment at: clang/lib/CodeGen/CGStmtOpenMP.cpp:7294
+  // Emit helper decls of the use_device_ptr/use_device_addr clauses.
+  for (const auto *C : S.getClausesOfKind())
+for (const Expr *E : C->varlists()) {

nit. I prefer outer braces if and inner blocks have braces.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144993/new/

https://reviews.llvm.org/D144993

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144884: [clang-format] Only add pragma continuation indentation for 'omp' clauses

2023-02-27 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 500872.
jhuber6 added a comment.

Add test for case in https://github.com/llvm/llvm-project/issues/59473


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144884/new/

https://reviews.llvm.org/D144884

Files:
  clang/lib/Format/ContinuationIndenter.cpp
  clang/unittests/Format/FormatTest.cpp


Index: clang/unittests/Format/FormatTest.cpp
===
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -20560,6 +20560,21 @@
 "(including parentheses).",
 format("#pragmamark   Any non-hyphenated or hyphenated string "
"(including parentheses)."));
+
+  EXPECT_EQ("#pragma mark Any non-hyphenated or hyphenated string "
+"(including parentheses).",
+format("#pragmamark   Any non-hyphenated or hyphenated string "
+   "(including parentheses)."));
+
+  EXPECT_EQ(
+  "#pragma comment(linker,\\\n"
+  "\"argument\" \\\n"
+  "\"argument\"",
+  format("#pragma comment(linker,  \\\n"
+ " \"argument\" \\\n"
+ " \"argument\"",
+ getStyleWithColumns(
+ getChromiumStyle(FormatStyle::LanguageKind::LK_Cpp), 32)));
 }
 
 TEST_F(FormatTest, UnderstandsPragmaOmpTarget) {
Index: clang/lib/Format/ContinuationIndenter.cpp
===
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -1273,8 +1273,13 @@
 return ContinuationIndent;
   }
 
-  if (State.Line->InPragmaDirective)
-return CurrentState.Indent + Style.ContinuationIndentWidth;
+  // OpenMP clauses want to get additional indentation when they are pushed 
onto
+  // the next line.
+  if (State.Line->InPragmaDirective) {
+FormatToken *PragmaType = State.Line->First->Next->Next;
+if (PragmaType && PragmaType->TokenText.equals("omp"))
+  return CurrentState.Indent + Style.ContinuationIndentWidth;
+  }
 
   // This ensure that we correctly format ObjC methods calls without inputs,
   // i.e. where the last element isn't selector like: [callee method];


Index: clang/unittests/Format/FormatTest.cpp
===
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -20560,6 +20560,21 @@
 "(including parentheses).",
 format("#pragmamark   Any non-hyphenated or hyphenated string "
"(including parentheses)."));
+
+  EXPECT_EQ("#pragma mark Any non-hyphenated or hyphenated string "
+"(including parentheses).",
+format("#pragmamark   Any non-hyphenated or hyphenated string "
+   "(including parentheses)."));
+
+  EXPECT_EQ(
+  "#pragma comment(linker,\\\n"
+  "\"argument\" \\\n"
+  "\"argument\"",
+  format("#pragma comment(linker,  \\\n"
+ " \"argument\" \\\n"
+ " \"argument\"",
+ getStyleWithColumns(
+ getChromiumStyle(FormatStyle::LanguageKind::LK_Cpp), 32)));
 }
 
 TEST_F(FormatTest, UnderstandsPragmaOmpTarget) {
Index: clang/lib/Format/ContinuationIndenter.cpp
===
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -1273,8 +1273,13 @@
 return ContinuationIndent;
   }
 
-  if (State.Line->InPragmaDirective)
-return CurrentState.Indent + Style.ContinuationIndentWidth;
+  // OpenMP clauses want to get additional indentation when they are pushed onto
+  // the next line.
+  if (State.Line->InPragmaDirective) {
+FormatToken *PragmaType = State.Line->First->Next->Next;
+if (PragmaType && PragmaType->TokenText.equals("omp"))
+  return CurrentState.Indent + Style.ContinuationIndentWidth;
+  }
 
   // This ensure that we correctly format ObjC methods calls without inputs,
   // i.e. where the last element isn't selector like: [callee method];
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


<    1   2   3   4   5   6   7   8   9   10   >