[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-06 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm closed this revision.
arsenm added a comment.

30eeb742f1d11d7a7036e3b8a3bffc1dfd252082 



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-06 Thread John McCall via Phabricator via cfe-commits
rjmccall accepted this revision.
rjmccall added a comment.
This revision is now accepted and ready to land.

Thanks, LGTM.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-05 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm updated this revision to Diff 283345.
arsenm added a comment.

Reword comment


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl

Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===
--- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -67,7 +67,6 @@
 int i2;
 } struct_of_structs_arg_t;
 
-// CHECK: %union.transparent_u = type { i32 }
 typedef union
 {
   int b1;
@@ -237,7 +236,7 @@
 // CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
 __kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
 
-// CHECK: void @test_kernel_transparent_union_arg(%union.transparent_u %u.coerce)
+// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
 __kernel void test_kernel_transparent_union_arg(transparent_u u) { }
 
 // CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
Index: clang/test/CodeGenCUDA/kernel-args.cu
===
--- clang/test/CodeGenCUDA/kernel-args.cu
+++ clang/test/CodeGenCUDA/kernel-args.cu
@@ -8,14 +8,14 @@
   int a[32];
 };
 
-// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
+// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
 // NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x)
 __global__ void kernel(A x) {
 }
 
 class Kernel {
 public:
-  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
   // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x)
   static __global__ void memberKernel(A x){}
   template static __global__ void templateMemberKernel(T x) {}
@@ -29,11 +29,11 @@
 
 void test() {
   Kernel K;
-  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
   // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)templateKernel);
 
-  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
   // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)Kernel::templateMemberKernel);
 }
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -257,6 +257,11 @@
<< " ByVal=" << getIndirectByVal()
<< " Realign=" << getIndirectRealign();
 break;
+  case IndirectAliased:
+OS << "Indirect Align=" << getIndirectAlign().getQuantity()
+   << " AadrSpace=" << getIndirectAddrSpace()
+   << " Realign=" << getIndirectRealign();
+break;
   case Expand:
 OS << "Expand";
 break;
@@ -1989,6 +1994,7 @@
   case ABIArgInfo::InAlloca:
 return true;
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;
   case ABIArgInfo::Indirect:
   case ABIArgInfo::Direct:
@@ -8790,18 +8796,31 @@
 
   // TODO: Can we omit empty structs?
 
-  llvm::Type *LTy = nullptr;
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
-LTy = CGT.ConvertType(QualType(SeltTy, 0));
+Ty = QualType(SeltTy, 0);
 
+  llvm::Type *OrigLTy = CGT.ConvertType(Ty);
+  llvm::Type *LTy = OrigLTy;
   if (getContext().getLangOpts().HIP) {
-if (!LTy)
-  LTy = CGT.ConvertType(Ty);
 LTy = coerceKernelArgumentType(
-LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
+OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
 /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
   }
 
+  // FIXME: Should also use this for OpenCL, but it requires addressing the
+  // problem of kernels being called.
+  //
+  // FIXME: This doesn't apply the optimization of coercing pointers in structs
+  // to global address space when using byref. This would require implementing a
+  // new kind of coercion of the in-memory type when for indirect arguments.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
+  isAggregateTypeForABI(Ty)) {
+retu

[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-05 Thread John McCall via Phabricator via cfe-commits
rjmccall added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:8816
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&

arsenm wrote:
> rjmccall wrote:
> > arsenm wrote:
> > > rjmccall wrote:
> > > > I don't see why you'd use `byref` when promoting pointers in structs.  
> > > > Maybe it works as a hack with your backend, but it seems *extremely* 
> > > > special-case and should not be hacked into the general infrastructure.
> > > The whole point is to reinterpret the address space of the pointers in 
> > > memory since we know if it's a kernel argument it has to be an 
> > > addrspace(1) pointer or garbage. We can't infer the address space of a 
> > > generic pointer loaded from memory.
> > > 
> > > byref doesn't change that, it just makes the fact that these are passed 
> > > in memory explicit
> > `byref` is interpreted by your backend passes as an instruction that the 
> > argument value is actually the address of an object that's passed to the 
> > kernel by value, so you need to expand the memory in the kernel argument 
> > marshalling.  Why would that be something you'd want to trigger when 
> > passing a struct with a pointer in it?  You're not going to recursively 
> > copy and pass down the pointee values of those pointers.
> Because all arguments are really passed byref, we're just not at the point 
> yet where we can switch all IR arguments to use byref for all arguments. All 
> of the relevant properties are really always on the in-memory value. 
> 
> The promotion this is talking about is really orthogonal to the IR mechanism 
> used for passing kernel arguments. This promotion is because the language 
> only exposes generic pointers. In the context of a pointer inside a struct 
> passed as a kernel argument, we semantically know the address space of any 
> valid pointers must be global. You could not produce a valid generic pointer 
> from another address space here. The pointers/structs are still the same size 
> and layout, but coercing the in-memory address space is semantically more 
> useful to the optimizer
I understand that the promotion is orthogonal to the IR mechanism used for 
passing kernel arguments, which is exactly why I'm asking why there's a comment 
saying that we should "use byref when promoting pointers in struct", because I 
have no idea what that's supposed to mean when the pointer is just a part of 
the value being passed.

It sounds like what you want is to maybe customize the code that's emitted to 
copy a byref parameter into a parameter variable when the parameter type is a 
struct containing a pointer you want to promote.  But that doesn't really have 
anything to do with `byref`; if you weren't using `byref`, you'd still want a 
similar customization when creating the parameter variable.  So it seems to me 
that the comment is still off-target.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-05 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:8816
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&

rjmccall wrote:
> arsenm wrote:
> > rjmccall wrote:
> > > I don't see why you'd use `byref` when promoting pointers in structs.  
> > > Maybe it works as a hack with your backend, but it seems *extremely* 
> > > special-case and should not be hacked into the general infrastructure.
> > The whole point is to reinterpret the address space of the pointers in 
> > memory since we know if it's a kernel argument it has to be an addrspace(1) 
> > pointer or garbage. We can't infer the address space of a generic pointer 
> > loaded from memory.
> > 
> > byref doesn't change that, it just makes the fact that these are passed in 
> > memory explicit
> `byref` is interpreted by your backend passes as an instruction that the 
> argument value is actually the address of an object that's passed to the 
> kernel by value, so you need to expand the memory in the kernel argument 
> marshalling.  Why would that be something you'd want to trigger when passing 
> a struct with a pointer in it?  You're not going to recursively copy and pass 
> down the pointee values of those pointers.
Because all arguments are really passed byref, we're just not at the point yet 
where we can switch all IR arguments to use byref for all arguments. All of the 
relevant properties are really always on the in-memory value. 

The promotion this is talking about is really orthogonal to the IR mechanism 
used for passing kernel arguments. This promotion is because the language only 
exposes generic pointers. In the context of a pointer inside a struct passed as 
a kernel argument, we semantically know the address space of any valid pointers 
must be global. You could not produce a valid generic pointer from another 
address space here. The pointers/structs are still the same size and layout, 
but coercing the in-memory address space is semantically more useful to the 
optimizer


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-05 Thread John McCall via Phabricator via cfe-commits
rjmccall added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1997
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;

arsenm wrote:
> arsenm wrote:
> > rjmccall wrote:
> > > arsenm wrote:
> > > > rjmccall wrote:
> > > > > In principle, this can be `inreg` just as much as Indirect can.
> > > > The IR verifier currently will reject byref + inreg
> > > Why?  `inreg` is essentially orthogonal.
> > Mostly inherited from the other similar attribute handling. It can be 
> > lifted if there's a use
> Plus the name here is isArgInAlloca; this is not necessarily passed in an 
> alloca
I agree that we don't need to update this.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:8816
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&

arsenm wrote:
> rjmccall wrote:
> > I don't see why you'd use `byref` when promoting pointers in structs.  
> > Maybe it works as a hack with your backend, but it seems *extremely* 
> > special-case and should not be hacked into the general infrastructure.
> The whole point is to reinterpret the address space of the pointers in memory 
> since we know if it's a kernel argument it has to be an addrspace(1) pointer 
> or garbage. We can't infer the address space of a generic pointer loaded from 
> memory.
> 
> byref doesn't change that, it just makes the fact that these are passed in 
> memory explicit
`byref` is interpreted by your backend passes as an instruction that the 
argument value is actually the address of an object that's passed to the kernel 
by value, so you need to expand the memory in the kernel argument marshalling.  
Why would that be something you'd want to trigger when passing a struct with a 
pointer in it?  You're not going to recursively copy and pass down the pointee 
values of those pointers.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-05 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1997
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;

arsenm wrote:
> rjmccall wrote:
> > arsenm wrote:
> > > rjmccall wrote:
> > > > In principle, this can be `inreg` just as much as Indirect can.
> > > The IR verifier currently will reject byref + inreg
> > Why?  `inreg` is essentially orthogonal.
> Mostly inherited from the other similar attribute handling. It can be lifted 
> if there's a use
Plus the name here is isArgInAlloca; this is not necessarily passed in an alloca


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-04 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1997
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;

rjmccall wrote:
> arsenm wrote:
> > rjmccall wrote:
> > > In principle, this can be `inreg` just as much as Indirect can.
> > The IR verifier currently will reject byref + inreg
> Why?  `inreg` is essentially orthogonal.
Mostly inherited from the other similar attribute handling. It can be lifted if 
there's a use


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-08-03 Thread John McCall via Phabricator via cfe-commits
rjmccall added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1997
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;

arsenm wrote:
> rjmccall wrote:
> > In principle, this can be `inreg` just as much as Indirect can.
> The IR verifier currently will reject byref + inreg
Why?  `inreg` is essentially orthogonal.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-07-29 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm updated this revision to Diff 281629.
arsenm marked 5 inline comments as done.
arsenm added a comment.

Address comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl

Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===
--- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -67,7 +67,6 @@
 int i2;
 } struct_of_structs_arg_t;
 
-// CHECK: %union.transparent_u = type { i32 }
 typedef union
 {
   int b1;
@@ -237,7 +236,7 @@
 // CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
 __kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
 
-// CHECK: void @test_kernel_transparent_union_arg(%union.transparent_u %u.coerce)
+// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
 __kernel void test_kernel_transparent_union_arg(transparent_u u) { }
 
 // CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
Index: clang/test/CodeGenCUDA/kernel-args.cu
===
--- clang/test/CodeGenCUDA/kernel-args.cu
+++ clang/test/CodeGenCUDA/kernel-args.cu
@@ -8,14 +8,14 @@
   int a[32];
 };
 
-// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
+// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
 // NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x)
 __global__ void kernel(A x) {
 }
 
 class Kernel {
 public:
-  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
   // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x)
   static __global__ void memberKernel(A x){}
   template static __global__ void templateMemberKernel(T x) {}
@@ -29,11 +29,11 @@
 
 void test() {
   Kernel K;
-  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
   // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)templateKernel);
 
-  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
   // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)Kernel::templateMemberKernel);
 }
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -257,6 +257,11 @@
<< " ByVal=" << getIndirectByVal()
<< " Realign=" << getIndirectRealign();
 break;
+  case IndirectAliased:
+OS << "Indirect Align=" << getIndirectAlign().getQuantity()
+   << " AadrSpace=" << getIndirectAddrSpace()
+   << " Realign=" << getIndirectRealign();
+break;
   case Expand:
 OS << "Expand";
 break;
@@ -1989,6 +1994,7 @@
   case ABIArgInfo::InAlloca:
 return true;
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;
   case ABIArgInfo::Indirect:
   case ABIArgInfo::Direct:
@@ -8792,18 +8798,30 @@
 
   // TODO: Can we omit empty structs?
 
-  llvm::Type *LTy = nullptr;
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
-LTy = CGT.ConvertType(QualType(SeltTy, 0));
+Ty = QualType(SeltTy, 0);
 
+  llvm::Type *OrigLTy = CGT.ConvertType(Ty);
+  llvm::Type *LTy = OrigLTy;
   if (getContext().getLangOpts().HIP) {
-if (!LTy)
-  LTy = CGT.ConvertType(Ty);
 LTy = coerceKernelArgumentType(
-LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
+OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
 /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
   }
 
+  // FIXME: Should also use this for OpenCL, but it requires addressing the
+  // problem of kernels being called.
+  //
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
+  isAggregateTypeForABI(Ty)) {
+return ABIArgInfo::getIndirectAliased(
+getContext().getTypeAlignInCha

[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-07-29 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1997
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;

rjmccall wrote:
> In principle, this can be `inreg` just as much as Indirect can.
The IR verifier currently will reject byref + inreg



Comment at: clang/lib/CodeGen/TargetInfo.cpp:8816
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&

rjmccall wrote:
> I don't see why you'd use `byref` when promoting pointers in structs.  Maybe 
> it works as a hack with your backend, but it seems *extremely* special-case 
> and should not be hacked into the general infrastructure.
The whole point is to reinterpret the address space of the pointers in memory 
since we know if it's a kernel argument it has to be an addrspace(1) pointer or 
garbage. We can't infer the address space of a generic pointer loaded from 
memory.

byref doesn't change that, it just makes the fact that these are passed in 
memory explicit



Comment at: clang/lib/CodeGen/TargetInfo.cpp:9383
   case ABIArgInfo::InAlloca:
+  case ABIArgInfo::IndirectAliased:
 llvm_unreachable("Unsupported ABI kind for va_arg");

rjmccall wrote:
> No reason not to use the Indirect code here.
I generally don't like speculatively adding handling for features I can't write 
a testcase for, but I've moved these


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-07-24 Thread John McCall via Phabricator via cfe-commits
rjmccall added inline comments.



Comment at: clang/include/clang/CodeGen/CGFunctionInfo.h:52
+/// IndirectAliased - Similar to Indirect, but the pointer may not be
+/// writable.
+IndirectAliased,

Hmm.  I guess there's actually two different potential conventions here:

- The caller can provide the address of a known-immutable object that has the 
right value, and the callee has to copy it if it needs the object to have a 
unique address or wants to mutate it.

- The caller can provide the address of *any* object that has the right value, 
and the callee has to copy it if it needs the object to have a unique address, 
wants to mutate it, or needs the value to stick around across call boundaries.

The advantage of the second is that IRGen could avoid some copies on the caller 
side, which could be advantageous when the callee is sufficiently trivial.  The 
disadvantage is that the callee would have to defensively copy in more 
situations.

Probably we should use the former.  Please be explicit about it, though:

  Similar to Indirect, but the pointer may be to an object that is otherwise
  referenced.  The object is known to not be modified through any other
  references for the duration of the call, and the callee must not itself
  modify the object.  Because C allows parameter variables to be modified
  and guarantees that they have unique addresses, the callee must
  defensively copy the object into a local variable if it might be modified or
  its address might be compared.  Since those are uncommon, in principle
  this convention allows programs to avoid copies in more situations.
  However, it may introduce *extra* copies if the callee fails to prove that
  a copy is unnecessary and the caller naturally produces an unaliased
  object for the argument.



Comment at: clang/lib/CodeGen/CGCall.cpp:2213
 Attrs.addAlignmentAttr(Align.getQuantity());
 
   // byval disables readnone and readonly.

Please add a TODO here that we could add the `byref` attribute if we're willing 
to update the test cases.  Maybe whoever does that can add alignments at the 
same time.



Comment at: clang/lib/CodeGen/CGCall.cpp:2462
+// may be aliased, copy it since the incoming argument may not be
+// mutable.
 Address V = ParamAddr;

"copy it to ensure that the parameter variable is mutable and has a unique 
address, as C requires".

I've wanted Sema to track whether local variables are mutated or have their 
address taken for a long time; maybe someday we can do that and then take 
advantage of it here.  Just a random thought, sorry.



Comment at: clang/lib/CodeGen/CGCall.cpp:4696
+case ABIArgInfo::IndirectAliased:
+  // This should be similar to Indirect, but no valid use case right now.
+  llvm_unreachable("Call arguments not implemented for IndirectAliased");

Please just make this use the Indirect code.  If we gave it special attention, 
we could optimize it better, but conservatively doing what Indirect does should 
still work.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1997
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;

In principle, this can be `inreg` just as much as Indirect can.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:8816
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&

I don't see why you'd use `byref` when promoting pointers in structs.  Maybe it 
works as a hack with your backend, but it seems *extremely* special-case and 
should not be hacked into the general infrastructure.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:9383
   case ABIArgInfo::InAlloca:
+  case ABIArgInfo::IndirectAliased:
 llvm_unreachable("Unsupported ABI kind for va_arg");

No reason not to use the Indirect code here.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:9754
+  case ABIArgInfo::IndirectAliased:
 llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:

Same.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-07-24 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm updated this revision to Diff 280607.
arsenm added a comment.

Use distinct ABIArgInfo::Kind. Also don't enable this for OpenCL yet, since 
that requires fixing the callable kernel workaround


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl

Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===
--- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -67,7 +67,6 @@
 int i2;
 } struct_of_structs_arg_t;
 
-// CHECK: %union.transparent_u = type { i32 }
 typedef union
 {
   int b1;
@@ -237,7 +236,7 @@
 // CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
 __kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
 
-// CHECK: void @test_kernel_transparent_union_arg(%union.transparent_u %u.coerce)
+// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
 __kernel void test_kernel_transparent_union_arg(transparent_u u) { }
 
 // CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
Index: clang/test/CodeGenCUDA/kernel-args.cu
===
--- clang/test/CodeGenCUDA/kernel-args.cu
+++ clang/test/CodeGenCUDA/kernel-args.cu
@@ -8,14 +8,14 @@
   int a[32];
 };
 
-// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
+// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
 // NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x)
 __global__ void kernel(A x) {
 }
 
 class Kernel {
 public:
-  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
   // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x)
   static __global__ void memberKernel(A x){}
   template static __global__ void templateMemberKernel(T x) {}
@@ -29,11 +29,11 @@
 
 void test() {
   Kernel K;
-  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
   // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)templateKernel);
 
-  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
+  // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
   // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)Kernel::templateMemberKernel);
 }
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -257,6 +257,11 @@
<< " ByVal=" << getIndirectByVal()
<< " Realign=" << getIndirectRealign();
 break;
+  case IndirectAliased:
+OS << "Indirect Align=" << getIndirectAlign().getQuantity()
+   << " AadrSpace=" << getIndirectAddrSpace()
+   << " Realign=" << getIndirectRealign();
+break;
   case Expand:
 OS << "Expand";
 break;
@@ -1989,6 +1994,7 @@
   case ABIArgInfo::InAlloca:
 return true;
   case ABIArgInfo::Ignore:
+  case ABIArgInfo::IndirectAliased:
 return false;
   case ABIArgInfo::Indirect:
   case ABIArgInfo::Direct:
@@ -8792,18 +8798,30 @@
 
   // TODO: Can we omit empty structs?
 
-  llvm::Type *LTy = nullptr;
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
-LTy = CGT.ConvertType(QualType(SeltTy, 0));
+Ty = QualType(SeltTy, 0);
 
+  llvm::Type *OrigLTy = CGT.ConvertType(Ty);
+  llvm::Type *LTy = OrigLTy;
   if (getContext().getLangOpts().HIP) {
-if (!LTy)
-  LTy = CGT.ConvertType(Ty);
 LTy = coerceKernelArgumentType(
-LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
+OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
 /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
   }
 
+  // FIXME: Should also use this for OpenCL, but it requires addressing the
+  // problem of kernels being called.
+  //
+  // FIXME: Should use byref when promoting pointers in structs, but this
+  // requires adding implementing the coercion.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
+  isAggregateTypeForABI(Ty)) {
+return 

[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-07-23 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

In D79744#2165929 , @rjmccall wrote:

> Arguably we should add this attribute to all indirect arguments.  I can 
> understand not wanting to update all the test cases, but you could probably 
> avoid adding a new IndirectByRef kind of ABIArgInfo by treating kernels 
> specially in ConstructAttributeList.
>
> Or, sorry, I forget — is this semantically necessary because the argument is 
> to constant memory and the callee has to copy it to form the mutable local?  
> If so, I think (1) the above statement about theoretically using `byref` on 
> all arguments still applies and (2) we do need a new ABIArgInfo kind, but we 
> should name it something like IndirectAliased.


Yes, it's semantically needed to insert the copy from constant memory. I 
originally interpreted a copy as necessary if the indirect addrspace did not 
match the stack address space, which is a sort of roundabout way of achieving 
the same thing


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79744: clang: Use byref for aggregate kernel arguments

2020-07-21 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Arguably we should add this attribute to all indirect arguments.  I can 
understand not wanting to update all the test cases, but you could probably 
avoid adding a new IndirectByRef kind of ABIArgInfo by treating kernels 
specially in ConstructAttributeList.

Or, sorry, I forget — is this semantically necessary because the argument is to 
constant memory and the callee has to copy it to form the mutable local?  If 
so, I think (1) the above statement about theoretically using `byref` on all 
arguments still applies and (2) we do need a new ABIArgInfo kind, but we should 
name it something like IndirectAliased.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79744/new/

https://reviews.llvm.org/D79744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits