[PATCH] D86376: [HIP] Emit kernel symbol

2021-03-01 Thread Yaxun Liu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5cf2a37f1255: [HIP] Emit kernel symbol (authored by yaxunl).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D86376?vs=322894=327268#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CGCUDARuntime.h
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/Inputs/cuda.h
  clang/test/CodeGenCUDA/cxx-call-kernel.cpp
  clang/test/CodeGenCUDA/kernel-dbg-info.cu
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -54,7 +54,7 @@
   [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
 // MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
 // MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -2,10 +2,17 @@
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN: -fcuda-include-gpubinary %t -o - -x hip\
-// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK
+// RUN:   | FileCheck %s
 
 #include "Inputs/cuda.h"
 
+// Kernel handles
+
+// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8
+// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8
+// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8
+// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
+
 extern "C" __global__ void ckernel() {}
 
 namespace ns {
@@ -17,6 +24,11 @@
 
 __global__ void kernel_decl();
 
+void (*kernel_ptr)();
+void *void_ptr;
+
+void launch(void *kern);
+
 // Device side kernel names
 
 // CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
@@ -26,16 +38,20 @@
 // Non-template kernel stub functions
 
 // CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
 // CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
+
 
-// CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
+// Check kernel stub is used for triple chevron
+
+// CHECK-LABEL: define{{.*}}@_Z4fun1v()
 // CHECK: call void @[[CSTUB]]()
 // CHECK: call void @[[NSSTUB]]()
 // CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
 // CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
-void hostfunc(void) {
+
+void fun1(void) {
   ckernel<<<1, 1>>>();
   ns::nskernel<<<1, 1>>>();
   kernelfunc<<<1, 1>>>();
@@ -45,11 +61,69 @@
 // Template kernel stub functions
 
 // CHECK: define{{.*}}@[[TSTUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
+
+// Check declaration of stub function for external kernel.
 
 // CHECK: declare{{.*}}@[[DSTUB]]
 
+// Check kernel handle is used for passing the kernel as a function pointer
+
+// CHECK-LABEL: define{{.*}}@_Z4fun2v()
+// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]]
+void fun2() {
+  launch((void *)ckernel);
+  launch((void *)ns::nskernel);
+  launch((void *)kernelfunc);
+  launch((void *)kernel_decl);
+}
+
+// Check kernel handle is used for assigning a kernel to a function pointer
+
+// CHECK-LABEL: define{{.*}}@_Z4fun3v()
+// CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
+// CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
+// CHECK:  store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** 

[PATCH] D86376: [HIP] Emit kernel symbol

2021-03-01 Thread Artem Belevich via Phabricator via cfe-commits
tra accepted this revision.
tra added a comment.
This revision is now accepted and ready to land.

So, to summarize how the patch changes the under-the-hood kernel launch 
machinery:

- device-side is unchanged. Kernel function is generated with the real kernel 
name
- host-side stub is still generated with the `__device_stub` prefix.
- host-side generates a 'handle' variable with the kernel function name, which 
is a pointer to the stub.
- host-side registers the `handle variable` -> `device-side kernel name` 
association with the HIP runtime.
- the address of the handle variable is used everywhere where we need a kernel 
pointer on the host side. I.e. passing kernel pointers around, referring to 
kernels across TUs, etc.
- `<<<>>>` becomes an indirect call to a `__device_stub` function using the 
pointer retrieved from the handle.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-25 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

ping


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-10 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 322894.
yaxunl added a comment.

handle launch kernel by API and launch kernel in function pointer.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CGCUDARuntime.h
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/Inputs/cuda.h
  clang/test/CodeGenCUDA/cxx-call-kernel.cpp
  clang/test/CodeGenCUDA/kernel-dbg-info.cu
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -54,7 +54,7 @@
   [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
 // MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
 // MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -2,10 +2,17 @@
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN: -fcuda-include-gpubinary %t -o - -x hip\
-// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK
+// RUN:   | FileCheck %s
 
 #include "Inputs/cuda.h"
 
+// Kernel handles
+
+// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8
+// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8
+// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8
+// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
+
 extern "C" __global__ void ckernel() {}
 
 namespace ns {
@@ -17,6 +24,11 @@
 
 __global__ void kernel_decl();
 
+void (*kernel_ptr)();
+void *void_ptr;
+
+void launch(void *kern);
+
 // Device side kernel names
 
 // CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
@@ -26,16 +38,20 @@
 // Non-template kernel stub functions
 
 // CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
 // CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
+
 
-// CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
+// Check kernel stub is used for triple chevron
+
+// CHECK-LABEL: define{{.*}}@_Z4fun1v()
 // CHECK: call void @[[CSTUB]]()
 // CHECK: call void @[[NSSTUB]]()
 // CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
 // CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
-void hostfunc(void) {
+
+void fun1(void) {
   ckernel<<<1, 1>>>();
   ns::nskernel<<<1, 1>>>();
   kernelfunc<<<1, 1>>>();
@@ -45,11 +61,69 @@
 // Template kernel stub functions
 
 // CHECK: define{{.*}}@[[TSTUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
+
+// Check declaration of stub function for external kernel.
 
 // CHECK: declare{{.*}}@[[DSTUB]]
 
+// Check kernel handle is used for passing the kernel as a function pointer
+
+// CHECK-LABEL: define{{.*}}@_Z4fun2v()
+// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]]
+// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]]
+void fun2() {
+  launch((void *)ckernel);
+  launch((void *)ns::nskernel);
+  launch((void *)kernelfunc);
+  launch((void *)kernel_decl);
+}
+
+// Check kernel handle is used for assigning a kernel to a function pointer
+
+// CHECK-LABEL: define{{.*}}@_Z4fun3v()
+// CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
+// CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
+// CHECK:  store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
+// CHECK:  store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
+void fun3() {
+  kernel_ptr = ckernel;
+  kernel_ptr = 
+  

[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D86376#2552524 , @tra wrote:

> In D86376#2552419 , @yaxunl wrote:
>
>> For triple chevron with kernel name, it is not needed. We only need 
>> indirection for a triple chevron with a function pointer, in which case we 
>> do not know its stub function at compile time. This is allowed by CUDA/HIP.
>
> Got it. We'll need to map the address of the symbol into the address of the 
> stub.
>
> Adding an indirection brings another question -- what's supposed to happen if 
> we're passed a pointer that's *not* a pointer to the symbol. I.e. it does not 
> point to the pointer to the stub.

The same thing could happen before this change, i.e., a function pointer does 
not contain the address of a stub function. In either case it will be UB. This 
change does not make the situation worse.

> Can we backtrack a bit and review our constraints/assumptions. I vaguely 
> recall AMD inproduced `__device_stub` because debugger needed to distinguish 
> host-side stub from the device-side kernel.
> If we add the data with the same name, would not it cause the same confusion 
> about what `kernel` is? If we are allowed to use 'kernel' on the host, is 
> there a reason not to rename `__device_stubkernel` back to `kernel` and just 
> use the stub address everywhere?

We have confirmed with our debugger team that emitting this symbol is OK for 
rocgdb since it is a variable symbol, not a function symbol.

> Another question -- assuming that the stub can't be renamed, can we give the 
> stub an alias with the name `kernel`? This way no matter how we take the 
> address, it will always point to the stub.

We have tried this and it did not work. The alias will ends up as a symbol to a 
function which is not allowed by rocgdb.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-09 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

In D86376#2552419 , @yaxunl wrote:

> For triple chevron with kernel name, it is not needed. We only need 
> indirection for a triple chevron with a function pointer, in which case we do 
> not know its stub function at compile time. This is allowed by CUDA/HIP.

Got it. We'll need to map the address of the symbol into the address of the 
stub.

Adding an indirection brings another question -- what's supposed to happen if 
we're passed a pointer that's *not* a pointer to the symbol. I.e. it does not 
point to the pointer to the stub.

Can we backtrack a bit and review our constraints/assumptions. I vaguely recall 
AMD inproduced `__device_stub` because debugger needed to distinguish host-side 
stub from the device-side kernel.
If we add the data with the same name, would not it cause the same confusion 
about what `kernel` is? If we are allowed to use 'kernel' on the host, is there 
a reason not to rename `__device_stubkernel` back to `kernel` and just use the 
stub address everywhere?

Another question -- assuming that the stub can't be renamed, can we give the 
stub an alias with the name `kernel`? This way no matter how we take the 
address, it will always point to the stub.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In D86376#2552066 , @tra wrote:

> In D86376#2551298 , @yaxunl wrote:
>
>> Actually there is one issue with this approach.
>>
>> HIP have API's to launch kernels, which accept kernel as function pointer 
>> argument. Currently when taking address of kernel, we get the stub function. 
>> These kernel launching API's will not work if we use kernel symbol to 
>> register the kernel. A solution is to return the kernel symbol instead of 
>> stub function when taking address of the kernel in host compilation, i.e. if 
>> a function pointer is assigned to a kernel in host code, it gets the kernel 
>> symbol instead of the stub function. This will make the kernel launching API 
>> work.
>>
>> To keep the triple chevron working, the kernel symbol will be initialized 
>> with the address of the stub function. For triple chevron call, the address 
>> of the stub function is loaded from the kernel symbol and invoked.
>
> This could work.
> Do we really need an indirection? If we know the stub address when we 
> initialize the symbol with it, we should be able to use that address for 
> `<<<>>>`.

For triple chevron with kernel name, it is not needed. We only need indirection 
for a triple chevron with a function pointer, in which case we do not know its 
stub function at compile time. This is allowed by CUDA/HIP.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-09 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

In D86376#2551298 , @yaxunl wrote:

> Actually there is one issue with this approach.
>
> HIP have API's to launch kernels, which accept kernel as function pointer 
> argument. Currently when taking address of kernel, we get the stub function. 
> These kernel launching API's will not work if we use kernel symbol to 
> register the kernel. A solution is to return the kernel symbol instead of 
> stub function when taking address of the kernel in host compilation, i.e. if 
> a function pointer is assigned to a kernel in host code, it gets the kernel 
> symbol instead of the stub function. This will make the kernel launching API 
> work.
>
> To keep the triple chevron working, the kernel symbol will be initialized 
> with the address of the stub function. For triple chevron call, the address 
> of the stub function is loaded from the kernel symbol and invoked.

This could work.
Do we really need an indirection? If we know the stub address when we 
initialize the symbol with it, we should be able to use that address for 
`<<<>>>`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-09 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

Actually there is one issue with this approach.

HIP have API's to launch kernels, which accept kernel as function pointer 
argument. Currently when taking address of kernel, we get the stub function. 
These kernel launching API's will not work if we use kernel symbol to register 
the kernel. A solution is to return the kernel symbol instead of stub function 
when taking address of the kernel in host compilation, i.e. if a function 
pointer is assigned to a kernel in host code, it gets the kernel symbol instead 
of the stub function. This will make the kernel launching API work.

To keep the triple chevron working, the kernel symbol will be initialized with 
the address of the stub function. For triple chevron call, the address of the 
stub function is loaded from the kernel symbol and invoked.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86376: [HIP] Emit kernel symbol

2021-02-08 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 322213.
yaxunl retitled this revision from "[HIP] Simplify kernel launching" to "[HIP] 
Emit kernel symbol".
yaxunl edited the summary of this revision.
yaxunl added a comment.

Revised by Artem's comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86376/new/

https://reviews.llvm.org/D86376

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/test/CodeGenCUDA/Inputs/cuda.h
  clang/test/CodeGenCUDA/cxx-call-kernel.cpp
  clang/test/CodeGenCUDA/kernel-dbg-info.cu
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -54,7 +54,7 @@
   [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
 // MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
 // MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -6,6 +6,12 @@
 
 #include "Inputs/cuda.h"
 
+// Kernel handles
+
+// CHECK: @[[HCKERN:ckernel]] = constant i8* null
+// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant i8* null
+// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant i8* null
+
 extern "C" __global__ void ckernel() {}
 
 namespace ns {
@@ -26,9 +32,9 @@
 // Non-template kernel stub functions
 
 // CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
 // CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
 
 // CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
 // CHECK: call void @[[CSTUB]]()
@@ -45,11 +51,11 @@
 // Template kernel stub functions
 
 // CHECK: define{{.*}}@[[TSTUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
 
 // CHECK: declare{{.*}}@[[DSTUB]]
 
 // CHECK-LABEL: define{{.*}}@__hip_register_globals
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
Index: clang/test/CodeGenCUDA/kernel-dbg-info.cu
===
--- clang/test/CodeGenCUDA/kernel-dbg-info.cu
+++ clang/test/CodeGenCUDA/kernel-dbg-info.cu
@@ -30,6 +30,9 @@
   *a = 1;
 }
 
+// Kernel symbol for launching kernel.
+// CHECK: @[[SYM:ckernel]] = constant i8* null
+
 // Device side kernel names
 // CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
 
@@ -40,7 +43,7 @@
 // Make sure there is no !dbg between function attributes and '{'
 // CHECK: define{{.*}} void @[[CSTUB:__device_stub__ckernel]]{{.*}} #{{[0-9]+}} {
 // CHECK-NOT: call {{.*}}@hipLaunchByPtr{{.*}}!dbg
-// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[SYM]]
 // CHECK-NOT: ret {{.*}}!dbg
 
 // CHECK-LABEL: define {{.*}}@_Z8hostfuncPi{{.*}}!dbg
Index: clang/test/CodeGenCUDA/cxx-call-kernel.cpp
===
--- /dev/null
+++ clang/test/CodeGenCUDA/cxx-call-kernel.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm-bc %s -o %t.hip.bc
+// RUN: %clang_cc1 -mlink-bitcode-file %t.hip.bc -DHIP_PLATFORM -emit-llvm \
+// RUN:   %s -o - | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: @_Z2g1i = constant i8* null
+#if __HIP__
+__global__ void g1(int x) {}
+#else
+extern void g1(int x);
+
+// CHECK: call i32 @hipLaunchKernel{{.*}}@_Z2g1i
+void test() {
+  hipLaunchKernel((void*)g1, 1, 1, nullptr, 0, 0);
+}
+
+// CHECK: __hipRegisterFunction{{.*}}@_Z2g1i
+#endif
Index: clang/test/CodeGenCUDA/Inputs/cuda.h