================
@@ -346,3 +367,121 @@ mlir::Operation
*CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
return globalOp;
}
+
+void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
+ const VarDecl *d, cir::GlobalLinkageKind &linkage) {
+ if (cgm.getLangOpts().GPURelocatableDeviceCode)
+ cgm.errorNYI(d->getSourceRange(),
+ "internalizeDeviceSideVar: GPU Relocatable Device Code
(RDC)");
+
+ // __shared__ variables are odd. Shadows do get created, but
+ // they are not registered with the CUDA runtime, so they
+ // can't really be used to access their device-side
+ // counterparts. It's not clear yet whether it's nvcc's bug or
+ // a feature, but we've got to do the same for compatibility.
+ if (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
+ d->hasAttr<CUDASharedAttr>()) {
+ linkage = cir::GlobalLinkageKind::InternalLinkage;
+ }
+
+ if (d->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ d->getType()->isCUDADeviceBuiltinTextureType())
+ cgm.errorNYI(d->getSourceRange(),
+ "internalizeDeviceSideVar: CUDA Surface/Texture support");
+}
+
+std::string CIRGenNVCUDARuntime::getDeviceSideName(const NamedDecl *nd) {
+ GlobalDecl gd;
+ // nd could be either a kernel or a variable.
+ if (auto *fd = dyn_cast<FunctionDecl>(nd))
+ gd = GlobalDecl(fd, KernelReferenceKind::Kernel);
+ else
+ gd = GlobalDecl(nd);
+ std::string deviceSideName;
+ MangleContext *mc;
+ if (cgm.getLangOpts().CUDAIsDevice)
+ mc = &cgm.getCXXABI().getMangleContext();
+ else
+ mc = deviceMC.get();
+ if (mc->shouldMangleDeclName(nd)) {
+ SmallString<256> buffer;
+ llvm::raw_svector_ostream out(buffer);
+ mc->mangleName(gd, out);
+ deviceSideName = std::string(out.str());
+ } else
+ deviceSideName = std::string(nd->getIdentifier()->getName());
+
+ // Make unique name for device side static file-scope variable for HIP.
+ if (cgm.getASTContext().shouldExternalize(nd) &&
+ cgm.getLangOpts().GPURelocatableDeviceCode) {
+ SmallString<256> buffer;
+ llvm::raw_svector_ostream out(buffer);
+ out << deviceSideName;
+ cgm.printPostfixForExternalizedDecl(out, nd);
+ deviceSideName = std::string(out.str());
+ }
+ return deviceSideName;
+}
+
+void CIRGenNVCUDARuntime::handleVarRegistration(const VarDecl *vd,
+ cir::GlobalOp var) {
+ if (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>()) {
+ // Shadow variables and their properties must be registered with CUDA
+ // runtime. Skip Extern global variables, which will be registered in
+ // the TU where they are defined.
+ //
+ // Don't register a C++17 inline variable. The local symbol can be
+ // discarded and referencing a discarded local symbol from outside the
+ // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+ //
+ // HIP managed variables need to be always recorded in device and host
+ // compilations for transformation.
+ //
+ // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
+ // added to llvm.compiler-used, therefore they are safe to be registered.
+ if ((!vd->hasExternalStorage() && !vd->isInline()) ||
+ cgm.getASTContext().CUDADeviceVarODRUsedByHost.contains(vd) ||
+ vd->hasAttr<HIPManagedAttr>()) {
+ registerDeviceVar(vd, var, !vd->hasDefinition(),
+ vd->hasAttr<CUDAConstantAttr>());
+ }
+ } else if (vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ vd->getType()->isCUDADeviceBuiltinTextureType()) {
+ // Builtin surfaces and textures and their template arguments are
+ // also registered with CUDA runtime.
+ cgm.errorNYI(vd->getSourceRange(),
+ "handleVarRegistration: Surface and Texture registration");
+ }
+}
+
+void CIRGenNVCUDARuntime::finalizeModule() {
+ if (!cgm.getLangOpts().CUDAIsDevice)
+ return;
+
+ // Mark ODR-used device variables as compiler used to prevent them from being
+ // eliminated by optimization. This is necessary for device variables
+ // ODR-used by host functions. Sema correctly marks them as ODR-used no
+ // matter whether they are ODR-used by device or host functions.
+ //
+ // We do not need to do this if the variable has used attribute since it
+ // has already been added.
+ //
+ // Static device variables have been externalized at this point, therefore
+ // variables with private or internal linkage need not be added.
+ for (auto globalOp : cgm.getModule().getOps<cir::GlobalOp>()) {
+ auto regAttr = globalOp->getAttrOfType<cir::CUDAVarRegistrationInfoAttr>(
+ cir::CUDAVarRegistrationInfoAttr::getMnemonic());
+ if (!regAttr)
+ continue;
+
+ auto kind = regAttr.getKind();
+ if (!globalOp.isDeclaration() &&
+ !cir::isLocalLinkage(globalOp.getLinkage()) &&
+ (kind == cir::CUDADeviceVarKind::Variable ||
+ kind == cir::CUDADeviceVarKind::Surface ||
+ kind == cir::CUDADeviceVarKind::Texture)) {
----------------
RiverDave wrote:
I think we can make this closer to OG.
OG's condition depends on AST-side state (`Info.D->isUsed()` and
`!Info.D->hasAttr<UsedAttr>()` - see here:
https://github.com/llvm/llvm-project/blob/3053a3c7b8646c5a9892c339befcb61802488b9e/clang/lib/CodeGen/CGCUDANV.cpp#L1281),
and that information is not represented in `cu.var_registration`.
Could we keep an OG-style `DeviceVars` side table in `CIRGenNVCUDARuntime`
containing the `cir::GlobalOp`, `VarDecl *`, and kind? The attr can still be
attached for later lowering, but `finalizeModule` should use the side table
referencing the AST decl so this stays equivalent to OG.
https://github.com/llvm/llvm-project/pull/190087
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits