https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/176250
Summary: The old `amdgpu_kernel` and `nvptx_kernel` attributes are better replaced by the new, generic `device_kernel`. >From 5d55beb44ac52545a9c8ccabed415dabed20af88 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Thu, 15 Jan 2026 15:32:09 -0600 Subject: [PATCH] [Clang][NFC] Replace device specific kernel attribute with generic one Summary: The old `amdgpu_kernel` and `nvptx_kernel` attributes are better replaced by the new, generic `device_kernel`. --- clang/lib/Headers/amdgpuintrin.h | 3 --- clang/lib/Headers/gpuintrin.h | 3 +++ clang/lib/Headers/nvptxintrin.h | 3 --- clang/lib/Headers/spirvintrin.h | 3 --- libc/startup/gpu/amdgpu/start.cpp | 4 ++-- libc/startup/gpu/nvptx/start.cpp | 6 +++--- 6 files changed, 8 insertions(+), 14 deletions(-) diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index f7fb8e2814180..e0989e0a2d097 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -27,9 +27,6 @@ _Pragma("omp begin declare variant match(device = {arch(amdgcn)})"); #define __gpu_global __attribute__((address_space(1))) #define __gpu_generic __attribute__((address_space(0))) -// Attribute to declare a function as a kernel. -#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected"))) - // Returns the number of workgroups in the 'x' dimension of the grid. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index f3cf2d0776c0c..010ec2264dc5f 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -69,6 +69,9 @@ _Pragma("omp end declare target"); _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); +// Attribute to declare a function as a kernel. +#define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) + #define __GPU_X_DIM 0 #define __GPU_Y_DIM 1 #define __GPU_Z_DIM 2 diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index fb811d0d58394..b2e538580ba10 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -31,9 +31,6 @@ _Pragma("omp begin declare variant match(device = {arch(nvptx64)})"); #define __gpu_global __attribute__((address_space(1))) #define __gpu_generic __attribute__((address_space(0))) -// Attribute to declare a function as a kernel. -#define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected"))) - // Returns the number of CUDA blocks in the 'x' dimension. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __nvvm_read_ptx_sreg_nctaid_x(); diff --git a/clang/lib/Headers/spirvintrin.h b/clang/lib/Headers/spirvintrin.h index 2a10a47adedde..9658f280b247d 100644 --- a/clang/lib/Headers/spirvintrin.h +++ b/clang/lib/Headers/spirvintrin.h @@ -27,9 +27,6 @@ _Pragma("omp begin declare variant match(device = {arch(spirv64)})"); #define __gpu_global __attribute__((address_space(1))) #define __gpu_generic __attribute__((address_space(4))) -// Attribute to declare a function as a kernel. -#define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) - // Returns the number of workgroups in the 'x' dimension of the grid. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __builtin_spirv_num_workgroups(0); diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index 446eead4e3935..47c7e1ccf10bd 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -45,7 +45,7 @@ static void call_fini_array_callbacks() { } // namespace LIBC_NAMESPACE_DECL -extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel, +extern "C" [[gnu::visibility("protected"), clang::device_kernel, clang::amdgpu_flat_work_group_size(1, 1), clang::amdgpu_max_num_work_groups(1)]] void _begin(int argc, char **argv, char **env) { @@ -59,7 +59,7 @@ _begin(int argc, char **argv, char **env) { LIBC_NAMESPACE::call_init_array_callbacks(argc, argv, env); } -extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _start(int argc, char **argv, char **envp, int *ret) { // Invoke the 'main' function with every active thread that the user launched // the _start kernel with. diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index be71bafa7c458..dc15b1be8c04f 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -51,7 +51,7 @@ static void call_fini_array_callbacks() { } // namespace LIBC_NAMESPACE_DECL -extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _begin(int argc, char **argv, char **env) { __atomic_store_n(&LIBC_NAMESPACE::app.env_ptr, reinterpret_cast<uintptr_t *>(env), __ATOMIC_RELAXED); @@ -64,14 +64,14 @@ _begin(int argc, char **argv, char **env) { LIBC_NAMESPACE::call_init_array_callbacks(argc, argv, env); } -extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _start(int argc, char **argv, char **envp, int *ret) { // Invoke the 'main' function with every active thread that the user launched // the _start kernel with. __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } -extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void _end() { +extern "C" [[gnu::visibility("protected"), clang::device_kernel]] void _end() { // Only a single thread should call the destructors registred with 'atexit'. // The loader utility will handle the actual exit and return code cleanly. __cxa_finalize(nullptr); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
