tra created this revision. tra added a reviewer: jlebar. Herald added subscribers: sanjoy.google, bixia, yaxunl. Herald added a project: clang. tra requested review of this revision.
This is needed to compile some headers in CUDA-11 that assume that threadIdx is implicitly convertible to dim3. With NVCC, threadIdx is uint3 and there's dim3(uint3) constructor, so that works. Clang uses a special type for the threadIdx, so dim3 can't be constructed from it. Instead, this patch adds conversion function to the builtin variable classes. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D88250 Files: clang/lib/Headers/__clang_cuda_builtin_vars.h clang/lib/Headers/__clang_cuda_runtime_wrapper.h
Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h =================================================================== --- clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -377,28 +377,36 @@ // Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to // come after we've pulled in the definition of uint3 and dim3. +__device__ inline __cuda_builtin_threadIdx_t::operator dim3() const { + return {x, y, z}; +} + __device__ inline __cuda_builtin_threadIdx_t::operator uint3() const { - uint3 ret; - ret.x = x; - ret.y = y; - ret.z = z; - return ret; + return {x, y, z}; +} + +__device__ inline __cuda_builtin_blockIdx_t::operator dim3() const { + return {x, y, z}; } __device__ inline __cuda_builtin_blockIdx_t::operator uint3() const { - uint3 ret; - ret.x = x; - ret.y = y; - ret.z = z; - return ret; + return {x, y, z}; } __device__ inline __cuda_builtin_blockDim_t::operator dim3() const { - return dim3(x, y, z); + return {x, y, z}; +} + +__device__ inline __cuda_builtin_blockDim_t::operator uint3() const { + return {x, y, z}; } __device__ inline __cuda_builtin_gridDim_t::operator dim3() const { - return dim3(x, y, z); + return {x, y, z}; +} + +__device__ inline __cuda_builtin_gridDim_t::operator uint3() const { + return {x, y, z}; } #include <__clang_cuda_cmath.h> Index: clang/lib/Headers/__clang_cuda_builtin_vars.h =================================================================== --- clang/lib/Headers/__clang_cuda_builtin_vars.h +++ clang/lib/Headers/__clang_cuda_builtin_vars.h @@ -55,7 +55,9 @@ __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); }; @@ -66,7 +68,9 @@ __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); }; @@ -78,6 +82,8 @@ // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; + __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); }; @@ -89,6 +95,8 @@ // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; + __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); }; @@ -108,5 +116,6 @@ #undef __CUDA_DEVICE_BUILTIN #undef __CUDA_BUILTIN_VAR #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS +#undef __DELETE #endif /* __CUDA_BUILTIN_VARS_H */
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits