ashi1 created this revision. ashi1 added a reviewer: yaxunl. Herald added a project: clang. Herald added a subscriber: cfe-commits.
Do not produce Fat binary functions for HIP when no device code is present. Repository: rC Clang https://reviews.llvm.org/D60141 Files: lib/CodeGen/CGCUDANV.cpp test/CodeGenCUDA/device-stub.cu Index: test/CodeGenCUDA/device-stub.cu =================================================================== --- test/CodeGenCUDA/device-stub.cu +++ test/CodeGenCUDA/device-stub.cu @@ -228,14 +228,21 @@ // device-side globals, but we still need to register GPU binary. // Skip GPU binary string first. // CUDANOGLOBALS: @{{.*}} = private constant{{.*}} -// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}} +// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}} // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals -// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor -// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals -// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor -// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary +// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor +// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary + +// There should be no fat binary functions when no device-code is found for HIP. +// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor +// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary + // There should be no constructors/destructors if we have no GPU binary. // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor Index: lib/CodeGen/CGCUDANV.cpp =================================================================== --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -472,13 +472,15 @@ StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; + if (IsHIP && EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // We always need a function to pass in as callback. Create a dummy // implementation if we don't need to register anything. if (RelocatableDeviceCode && !RegisterGlobalsFunc) RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); // void ** __{cuda|hip}RegisterFatBinary(void *); llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
Index: test/CodeGenCUDA/device-stub.cu =================================================================== --- test/CodeGenCUDA/device-stub.cu +++ test/CodeGenCUDA/device-stub.cu @@ -228,14 +228,21 @@ // device-side globals, but we still need to register GPU binary. // Skip GPU binary string first. // CUDANOGLOBALS: @{{.*}} = private constant{{.*}} -// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}} +// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}} // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals -// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor -// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals -// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor -// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary +// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor +// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary + +// There should be no fat binary functions when no device-code is found for HIP. +// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor +// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper +// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor +// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary + // There should be no constructors/destructors if we have no GPU binary. // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor Index: lib/CodeGen/CGCUDANV.cpp =================================================================== --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -472,13 +472,15 @@ StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; + if (IsHIP && EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // We always need a function to pass in as callback. Create a dummy // implementation if we don't need to register anything. if (RelocatableDeviceCode && !RegisterGlobalsFunc) RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); // void ** __{cuda|hip}RegisterFatBinary(void *); llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits