https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/184845
>From 96af4c36b111c125f906a69cd3b64bf25a57d36b Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Thu, 5 Mar 2026 15:53:22 +0100 Subject: [PATCH] libclc: Add amdgpu subgroup functions --- libclc/opencl/lib/amdgcn/SOURCES | 2 + libclc/opencl/lib/amdgcn/subgroup/subgroup.cl | 60 +++++++++++++++++++ .../synchronization/sub_group_barrier.cl | 21 +++++++ 3 files changed, 83 insertions(+) create mode 100644 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl create mode 100644 libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES index 84fc4a6650c32..9b3ddf192e3dc 100644 --- a/libclc/opencl/lib/amdgcn/SOURCES +++ b/libclc/opencl/lib/amdgcn/SOURCES @@ -1,4 +1,6 @@ mem_fence/fence.cl +subgroup/subgroup.cl +synchronization/sub_group_barrier.cl workitem/get_global_offset.cl workitem/get_group_id.cl workitem/get_global_size.cl diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl new file mode 100644 index 0000000000000..d67d84e763b4f --- /dev/null +++ b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/opencl/opencl-base.h> + +static uint wavesize_log2() { + return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5; +} + +static uint workgroup_size() { + return mul24((uint)get_local_size(2), + mul24((uint)get_local_size(1), (uint)get_local_size(0))); +} + +static uint enqueued_workgroup_size() { + return mul24((uint)get_enqueued_local_size(2), + mul24((uint)get_enqueued_local_size(1), + (uint)get_enqueued_local_size(0))); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { + uint wavesize = __builtin_amdgcn_wavefrontsize(); + uint lid = (uint)get_local_linear_id(); + return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1))); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { + return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size()); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { + return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >> + wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >> + wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { + return (uint)get_local_linear_id() >> wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) { + return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) { + return __builtin_amdgcn_ballot_w64(x) != 0; +} diff --git a/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl new file mode 100644 index 0000000000000..2b57d86294ef5 --- /dev/null +++ b/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/opencl/opencl-base.h> + +_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags, + memory_scope scope) { + __builtin_amdgcn_wave_barrier(); + + if (flags) + atomic_work_item_fence(flags, memory_order_acq_rel, scope); +} + +_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags) { + sub_group_barrier(flags, memory_scope_sub_group); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
