yaxunl updated this revision to Diff 231966.
yaxunl added a comment.
Herald added subscribers: nhaehnle, jvesely.

clean up and fix assertions.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68578/new/

https://reviews.llvm.org/D68578

Files:
  clang/include/clang/Basic/Specifiers.h
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/AST/Type.cpp
  clang/lib/AST/TypePrinter.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CGCUDARuntime.h
  clang/lib/CodeGen/CGDebugInfo.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -36,4 +36,4 @@
   }(p);
 }
 // HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -6,15 +6,50 @@
 
 #include "Inputs/cuda.h"
 
+extern "C" __global__ void ckernel() {}
+
+namespace ns {
+__global__ void nskernel() {}
+} // namespace ns
+
 template<class T>
 __global__ void kernelfunc() {}
 
+__global__ void kernel_decl();
+
+// Device side kernel names
+
+// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
+// CHECK: @[[NSKERN:[0-9]*]] = {{.*}} c"_ZN2ns8nskernelEv\00"
+// CHECK: @[[TKERN:[0-9]*]] = {{.*}} c"_Z10kernelfuncIiEvv\00"
+
+// Non-template kernel stub functions
+
+// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+
 // CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
-// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]()
-void hostfunc(void) { kernelfunc<int><<<1, 1>>>(); }
+// CHECK: call void @[[CSTUB]]()
+// CHECK: call void @[[NSSTUB]]()
+// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
+// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
+void hostfunc(void) {
+  ckernel<<<1, 1>>>();
+  ns::nskernel<<<1, 1>>>();
+  kernelfunc<int><<<1, 1>>>();
+  kernel_decl<<<1, 1>>>();
+}
+
+// Template kernel stub functions
+
+// CHECK: define{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
 
-// CHECK: define{{.*}}@[[STUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]]
+// CHECK: declare{{.*}}@[[DSTUB]]
 
 // CHECK-LABEL: define{{.*}}@__hip_register_globals
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -13,19 +13,19 @@
 // HOST-NOT: %struct.T.coerce
 
 // CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
-// HOST: define void @_Z7kernel1Pi.stub(i32* %x)
+// HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x)
 __global__ void kernel1(int *x) {
   x[0]++;
 }
 
 // CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
-// HOST: define void @_Z7kernel2Ri.stub(i32* dereferenceable(4) %x)
+// HOST: define void @_Z22__device_stub__kernel2Ri(i32* dereferenceable(4) %x)
 __global__ void kernel2(int &x) {
   x++;
 }
 
 // CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
-// HOST: define void @_Z7kernel3PU3AS2iPU3AS1i.stub(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+// HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
 __global__ void kernel3(__attribute__((address_space(2))) int *x,
                         __attribute__((address_space(1))) int *y) {
   y[0] = x[0];
@@ -43,7 +43,7 @@
 // `by-val` struct will be coerced into a similar struct with all generic
 // pointers lowerd into global ones.
 // CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
-// HOST: define void @_Z7kernel41S.stub(i32* %s.coerce0, float* %s.coerce1)
+// HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
 __global__ void kernel4(struct S s) {
   s.x[0]++;
   s.y[0] += 1.f;
@@ -51,7 +51,7 @@
 
 // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
 // CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
-// HOST: define void @_Z7kernel5P1S.stub(%struct.S* %s)
+// HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s)
 __global__ void kernel5(struct S *s) {
   s->x[0]++;
   s->y[0] += 1.f;
@@ -62,7 +62,7 @@
 };
 // `by-val` array is also coerced.
 // CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
-// HOST: define void @_Z7kernel61T.stub(float* %t.coerce0, float* %t.coerce1)
+// HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
 __global__ void kernel6(struct T t) {
   t.x[0][0] += 1.f;
   t.x[1][0] += 2.f;
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1004,6 +1004,10 @@
         FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
       llvm::raw_svector_ostream Out(Buffer);
       Out << "__regcall3__" << II->getName();
+    } else if (FD && FD->getType()->castAs<FunctionType>()->getCallConv() ==
+                         CC_DeviceStub) {
+      llvm::raw_svector_ostream Out(Buffer);
+      Out << "__device_stub__" << II->getName();
     } else {
       Out << II->getName();
     }
@@ -1089,13 +1093,40 @@
 
   // Keep the first result in the case of a mangling collision.
   const auto *ND = cast<NamedDecl>(GD.getDecl());
-  std::string MangledName = getMangledNameImpl(*this, GD, ND);
-
-  // Adjust kernel stub mangling as we may need to be able to differentiate
-  // them from the kernel itself (e.g., for HIP).
-  if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
-    if (!getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>())
-      MangledName = getCUDARuntime().getDeviceStubName(MangledName);
+  std::string MangledName;
+
+  // ToDo: HIP needs unmangled name of device stub to be prefixed with
+  // __device_stub__. This is done by adding device stub calling convention
+  // to the stub function or its template temporarily then let the mangler
+  // mangle it differently. We cannot let the stub function have device stub
+  // calling convention in AST permanently because: 1. we still need to use
+  // the same FunctionDecl to mangle the kernel name which should not have
+  // the prefix; 2. the stub function does not really have a specific calling
+  // convention which is enforced by type checking.
+  // Need a better way to mangle the device stub function.
+  auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
+  if (getLangOpts().HIP && FD &&
+      getContext().getTargetInfo().getCXXABI() != TargetCXXABI::Microsoft &&
+      !getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) {
+
+    // Make sure the non-prefixed stub name is the same as device side kernel
+    // name.
+    assert(getCUDARuntime().getDeviceSideName(FD) ==
+               getMangledNameImpl(*this, GD, FD) &&
+           "Mismatch between host side and device side kernel name");
+
+    if (auto *TD = cast<FunctionDecl>(FD)->getPrimaryTemplate())
+      FD = TD->getTemplatedDecl();
+    auto OldQT = FD->getType();
+    auto *OldFT = OldQT->getAs<FunctionType>();
+    auto *NewFT = getContext().adjustFunctionType(
+        OldFT, OldFT->getExtInfo().withCallingConv(CC_DeviceStub));
+    const_cast<FunctionDecl *>(FD)->setType(QualType(NewFT, 0));
+    MangledName = getMangledNameImpl(*this, GD, ND);
+    assert(MangledName.find("__device_stub__") != StringRef::npos);
+    const_cast<FunctionDecl *>(FD)->setType(OldQT);
+  } else
+    MangledName = getMangledNameImpl(*this, GD, ND);
 
   auto Result = Manglings.insert(std::make_pair(MangledName, GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
Index: clang/lib/CodeGen/CGDebugInfo.cpp
===================================================================
--- clang/lib/CodeGen/CGDebugInfo.cpp
+++ clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1149,6 +1149,7 @@
 
 static unsigned getDwarfCC(CallingConv CC) {
   switch (CC) {
+  case CC_DeviceStub:
   case CC_C:
     // Avoid emitting DW_AT_calling_convention if the C convention was used.
     return 0;
Index: clang/lib/CodeGen/CGCUDARuntime.h
===================================================================
--- clang/lib/CodeGen/CGCUDARuntime.h
+++ clang/lib/CodeGen/CGCUDARuntime.h
@@ -25,6 +25,7 @@
 namespace clang {
 
 class CUDAKernelCallExpr;
+class Decl;
 class VarDecl;
 
 namespace CodeGen {
@@ -66,8 +67,8 @@
   /// Must be called after ModuleCtorFunction
   virtual llvm::Function *makeModuleDtorFunction() = 0;
 
-  /// Construct and return the stub name of a kernel.
-  virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0;
+  // Returns device side kernel name.
+  virtual std::string getDeviceSideName(const Decl *ND) = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -117,7 +117,6 @@
 
   void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
   void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
-  std::string getDeviceSideName(const Decl *ND);
 
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
@@ -132,8 +131,7 @@
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
   llvm::Function *makeModuleDtorFunction() override;
-  /// Construct and return the stub name of a kernel.
-  std::string getDeviceStubName(llvm::StringRef Name) const override;
+  std::string getDeviceSideName(const Decl *ND) override;
 };
 
 }
@@ -231,8 +229,8 @@
   assert((CGF.CGM.getContext().getAuxTargetInfo() &&
           (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
            CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
-         getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
-             CGF.CurFn->getName());
+         CGF.getLangOpts().HIP ||
+         getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName());
 
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
@@ -797,12 +795,6 @@
   return ModuleDtorFunc;
 }
 
-std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
-  if (!CGM.getLangOpts().HIP)
-    return Name;
-  return (Name + ".stub").str();
-}
-
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -313,6 +313,7 @@
     case CC_X86Pascal:
     case CC_IntelOclBicc:
     case CC_OpenCLKernel:
+    case CC_DeviceStub:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -659,6 +660,7 @@
     case CC_PreserveAll:
     case CC_X86RegCall:
     case CC_OpenCLKernel:
+    case CC_DeviceStub:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -733,6 +735,7 @@
     case CC_Swift:
     case CC_X86RegCall:
     case CC_OpenCLKernel:
+    case CC_DeviceStub:
       return CCCR_OK;
     default:
       return CCCR_Warning;
Index: clang/lib/AST/TypePrinter.cpp
===================================================================
--- clang/lib/AST/TypePrinter.cpp
+++ clang/lib/AST/TypePrinter.cpp
@@ -893,6 +893,7 @@
       break;
     case CC_SpirFunction:
     case CC_OpenCLKernel:
+    case CC_DeviceStub:
       // Do nothing. These CCs are not available as attributes.
       break;
     case CC_Swift:
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -2947,6 +2947,8 @@
   case CC_Swift: return "swiftcall";
   case CC_PreserveMost: return "preserve_most";
   case CC_PreserveAll: return "preserve_all";
+  case CC_DeviceStub:
+    return "device_stub";
   }
 
   llvm_unreachable("Invalid calling convention.");
Index: clang/lib/AST/MicrosoftMangle.cpp
===================================================================
--- clang/lib/AST/MicrosoftMangle.cpp
+++ clang/lib/AST/MicrosoftMangle.cpp
@@ -2393,6 +2393,7 @@
       llvm_unreachable("Unsupported CC for mangling");
     case CC_Win64:
     case CC_X86_64SysV:
+    case CC_DeviceStub:
     case CC_C: Out << 'A'; break;
     case CC_X86Pascal: Out << 'C'; break;
     case CC_X86ThisCall: Out << 'E'; break;
Index: clang/lib/AST/ItaniumMangle.cpp
===================================================================
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -483,6 +483,7 @@
                                   const AbiTagList *AdditionalAbiTags);
   void mangleSourceName(const IdentifierInfo *II);
   void mangleRegCallName(const IdentifierInfo *II);
+  void mangleDeviceStubName(const IdentifierInfo *II);
   void mangleSourceNameWithAbiTags(
       const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
   void mangleLocalName(const Decl *D,
@@ -1302,7 +1303,12 @@
       bool IsRegCall = FD &&
                        FD->getType()->castAs<FunctionType>()->getCallConv() ==
                            clang::CC_X86RegCall;
-      if (IsRegCall)
+      bool IsDeviceStub =
+          FD && FD->getType()->castAs<FunctionType>()->getCallConv() ==
+                    clang::CC_DeviceStub;
+      if (IsDeviceStub)
+        mangleDeviceStubName(II);
+      else if (IsRegCall)
         mangleRegCallName(II);
       else
         mangleSourceName(II);
@@ -1491,6 +1497,14 @@
       << II->getName();
 }
 
+void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
+  // <source-name> ::= <positive length number> __device_stub__ <identifier>
+  // <number> ::= [n] <non-negative decimal integer>
+  // <identifier> ::= <unqualified source code identifier>
+  Out << II->getLength() + sizeof("__device_stub__") - 1 << "__device_stub__"
+      << II->getName();
+}
+
 void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
   // <source-name> ::= <positive length number> <identifier>
   // <number> ::= [n] <non-negative decimal integer>
@@ -2734,6 +2748,7 @@
   case CC_OpenCLKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
+  case CC_DeviceStub:
     // FIXME: we should be mangling all of the above.
     return "";
 
Index: clang/include/clang/Basic/Specifiers.h
===================================================================
--- clang/include/clang/Basic/Specifiers.h
+++ clang/include/clang/Basic/Specifiers.h
@@ -263,24 +263,25 @@
 
   /// CallingConv - Specifies the calling convention that a function uses.
   enum CallingConv {
-    CC_C,           // __attribute__((cdecl))
-    CC_X86StdCall,  // __attribute__((stdcall))
-    CC_X86FastCall, // __attribute__((fastcall))
-    CC_X86ThisCall, // __attribute__((thiscall))
-    CC_X86VectorCall, // __attribute__((vectorcall))
-    CC_X86Pascal,   // __attribute__((pascal))
-    CC_Win64,       // __attribute__((ms_abi))
-    CC_X86_64SysV,  // __attribute__((sysv_abi))
-    CC_X86RegCall, // __attribute__((regcall))
-    CC_AAPCS,       // __attribute__((pcs("aapcs")))
-    CC_AAPCS_VFP,   // __attribute__((pcs("aapcs-vfp")))
-    CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
-    CC_SpirFunction, // default for OpenCL functions on SPIR target
-    CC_OpenCLKernel, // inferred for OpenCL kernels
-    CC_Swift,        // __attribute__((swiftcall))
-    CC_PreserveMost, // __attribute__((preserve_most))
-    CC_PreserveAll,  // __attribute__((preserve_all))
+    CC_C,                 // __attribute__((cdecl))
+    CC_X86StdCall,        // __attribute__((stdcall))
+    CC_X86FastCall,       // __attribute__((fastcall))
+    CC_X86ThisCall,       // __attribute__((thiscall))
+    CC_X86VectorCall,     // __attribute__((vectorcall))
+    CC_X86Pascal,         // __attribute__((pascal))
+    CC_Win64,             // __attribute__((ms_abi))
+    CC_X86_64SysV,        // __attribute__((sysv_abi))
+    CC_X86RegCall,        // __attribute__((regcall))
+    CC_AAPCS,             // __attribute__((pcs("aapcs")))
+    CC_AAPCS_VFP,         // __attribute__((pcs("aapcs-vfp")))
+    CC_IntelOclBicc,      // __attribute__((intel_ocl_bicc))
+    CC_SpirFunction,      // default for OpenCL functions on SPIR target
+    CC_OpenCLKernel,      // inferred for OpenCL kernels
+    CC_Swift,             // __attribute__((swiftcall))
+    CC_PreserveMost,      // __attribute__((preserve_most))
+    CC_PreserveAll,       // __attribute__((preserve_all))
     CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
+    CC_DeviceStub,        // inferred for HIP device stub
   };
 
   /// Checks whether the given calling convention supports variadic
@@ -296,6 +297,7 @@
     case CC_SpirFunction:
     case CC_OpenCLKernel:
     case CC_Swift:
+    case CC_DeviceStub:
       return false;
     default:
       return true;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to