Author: Joseph Huber Date: 2026-01-15T17:26:51-06:00 New Revision: a99a0023301ebc4226d709c164df04d073dec102
URL: https://github.com/llvm/llvm-project/commit/a99a0023301ebc4226d709c164df04d073dec102 DIFF: https://github.com/llvm/llvm-project/commit/a99a0023301ebc4226d709c164df04d073dec102.diff LOG: [Clang][NFC] Replace device specific kernel attribute with generic one (#176250) Summary: The old `amdgpu_kernel` and `nvptx_kernel` attributes are better replaced by the new, generic `device_kernel`. Added: Modified: clang/lib/Headers/amdgpuintrin.h clang/lib/Headers/gpuintrin.h clang/lib/Headers/nvptxintrin.h clang/lib/Headers/spirvintrin.h libc/startup/gpu/amdgpu/start.cpp libc/startup/gpu/nvptx/start.cpp Removed: ################################################################################ diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index f7fb8e2814180..e0989e0a2d097 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -27,9 +27,6 @@ _Pragma("omp begin declare variant match(device = {arch(amdgcn)})"); #define __gpu_global __attribute__((address_space(1))) #define __gpu_generic __attribute__((address_space(0))) -// Attribute to declare a function as a kernel. -#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected"))) - // Returns the number of workgroups in the 'x' dimension of the grid. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index f3cf2d0776c0c..010ec2264dc5f 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -69,6 +69,9 @@ _Pragma("omp end declare target"); _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); +// Attribute to declare a function as a kernel. +#define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) + #define __GPU_X_DIM 0 #define __GPU_Y_DIM 1 #define __GPU_Z_DIM 2 diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index fb811d0d58394..b2e538580ba10 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -31,9 +31,6 @@ _Pragma("omp begin declare variant match(device = {arch(nvptx64)})"); #define __gpu_global __attribute__((address_space(1))) #define __gpu_generic __attribute__((address_space(0))) -// Attribute to declare a function as a kernel. -#define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected"))) - // Returns the number of CUDA blocks in the 'x' dimension. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __nvvm_read_ptx_sreg_nctaid_x(); diff --git a/clang/lib/Headers/spirvintrin.h b/clang/lib/Headers/spirvintrin.h index 2a10a47adedde..9658f280b247d 100644 --- a/clang/lib/Headers/spirvintrin.h +++ b/clang/lib/Headers/spirvintrin.h @@ -27,9 +27,6 @@ _Pragma("omp begin declare variant match(device = {arch(spirv64)})"); #define __gpu_global __attribute__((address_space(1))) #define __gpu_generic __attribute__((address_space(4))) -// Attribute to declare a function as a kernel. -#define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) - // Returns the number of workgroups in the 'x' dimension of the grid. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __builtin_spirv_num_workgroups(0); diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index 446eead4e3935..ef627494fde5d 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -45,7 +45,7 @@ static void call_fini_array_callbacks() { } // namespace LIBC_NAMESPACE_DECL -extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel, +extern "C" [[gnu::visibility("protected"), clang::device_kernel, clang::amdgpu_flat_work_group_size(1, 1), clang::amdgpu_max_num_work_groups(1)]] void _begin(int argc, char **argv, char **env) { @@ -59,14 +59,14 @@ _begin(int argc, char **argv, char **env) { LIBC_NAMESPACE::call_init_array_callbacks(argc, argv, env); } -extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _start(int argc, char **argv, char **envp, int *ret) { // Invoke the 'main' function with every active thread that the user launched // the _start kernel with. __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } -extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel, +extern "C" [[gnu::visibility("protected"), clang::device_kernel, clang::amdgpu_flat_work_group_size(1, 1), clang::amdgpu_max_num_work_groups(1)]] void _end() { diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index be71bafa7c458..dc15b1be8c04f 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -51,7 +51,7 @@ static void call_fini_array_callbacks() { } // namespace LIBC_NAMESPACE_DECL -extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _begin(int argc, char **argv, char **env) { __atomic_store_n(&LIBC_NAMESPACE::app.env_ptr, reinterpret_cast<uintptr_t *>(env), __ATOMIC_RELAXED); @@ -64,14 +64,14 @@ _begin(int argc, char **argv, char **env) { LIBC_NAMESPACE::call_init_array_callbacks(argc, argv, env); } -extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _start(int argc, char **argv, char **envp, int *ret) { // Invoke the 'main' function with every active thread that the user launched // the _start kernel with. __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } -extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void _end() { +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _end() { // Only a single thread should call the destructors registred with 'atexit'. // The loader utility will handle the actual exit and return code cleanly. __cxa_finalize(nullptr); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
