tra added a comment. LGTM.
================ Comment at: clang/lib/CodeGen/CodeGenModule.cpp:4270-4271 } else { - // Host-side shadows of external declarations of device-side - // global variables become internal definitions. These have to - // be internal in order to prevent name conflicts with global - // host variables with the same name in a different TUs. - if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) { - Linkage = llvm::GlobalValue::InternalLinkage; - // Shadow variables and their properties must be registered with CUDA - // runtime. Skip Extern global variables, which will be registered in - // the TU where they are defined. - // - // Don't register a C++17 inline variable. The local symbol can be - // discarded and referencing a discarded local symbol from outside the - // comdat (__cuda_register_globals) is disallowed by the ELF spec. - // TODO: Reject __device__ constexpr and __device__ inline in Sema. - if (!D->hasExternalStorage() && !D->isInline()) - getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), - D->hasAttr<CUDAConstantAttr>()); - } else if (D->hasAttr<CUDASharedAttr>()) { - // __shared__ variables are odd. Shadows do get created, but - // they are not registered with the CUDA runtime, so they - // can't really be used to access their device-side - // counterparts. It's not clear yet whether it's nvcc's bug or - // a feature, but we've got to do the same for compatibility. - Linkage = llvm::GlobalValue::InternalLinkage; - } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || - D->getType()->isCUDADeviceBuiltinTextureType()) { - // Builtin surfaces and textures and their template arguments are - // also registered with CUDA runtime. - Linkage = llvm::GlobalValue::InternalLinkage; - const ClassTemplateSpecializationDecl *TD = - cast<ClassTemplateSpecializationDecl>( - D->getType()->getAs<RecordType>()->getDecl()); - const TemplateArgumentList &Args = TD->getTemplateArgs(); - if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) { - assert(Args.size() == 2 && - "Unexpected number of template arguments of CUDA device " - "builtin surface type."); - auto SurfType = Args[1].getAsIntegral(); - if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(), - SurfType.getSExtValue()); - } else { - assert(Args.size() == 3 && - "Unexpected number of template arguments of CUDA device " - "builtin texture type."); - auto TexType = Args[1].getAsIntegral(); - auto Normalized = Args[2].getAsIntegral(); - if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(), - TexType.getSExtValue(), - Normalized.getZExtValue()); - } - } + getCUDARuntime().internalizeDeviceSideVars(D, Linkage); + getCUDARuntime().handleVarRegistration(D, *GV); } ---------------- yaxunl wrote: > tra wrote: > > yaxunl wrote: > > > tra wrote: > > > > Should we fold `internalizeDeviceSideVar` into `handleVarRegistration` > > > > or call it from there? > > > > I don't think we'll have any independent use for it in CGM. It seems > > > > to be an implementation detail for `handleVarRegistration` and may not > > > > even need to be virtual. > > > > > > > For function scope static variable, I only need to call > > > handleVarRegistration since the variable already has internal linkage. If > > > internalizeDeviceSideVars is absorbed into handleVarRegistration, there > > > be useless work and I also need to define a useless automatic variable > > > `Linkage` and pass it to handleVarRegistration. > > `internalizeDeviceSideVars` is nearly trivial. I doubt that it will have > > any measureable impact, either way. > > > > At the moment `handleVarRegistration()` is called only from here. Are you > > saying that separate internalization and registration will be needed in the > > future changes? If that's the case I'm fine keeping them separate. > > > > As things are in the patch, I do not see why `internalizeDeviceSideVars` > > needs to be a base class interface. It's always used along with > > `handleVarRegistration` and we have no need to be able to override it > > independently of it. Folding it into or calling it from > > `handleVarRegistration` looks like a natural fit. > > > > > It is used differently in https://reviews.llvm.org/D95560, where only > handleVarRegistration is called. Got it. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95558/new/ https://reviews.llvm.org/D95558 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits