Author: Matt Arsenault Date: 2026-03-06T09:04:35+01:00 New Revision: 049efc73bed254fb34683615926a997b0bf30243
URL: https://github.com/llvm/llvm-project/commit/049efc73bed254fb34683615926a997b0bf30243 DIFF: https://github.com/llvm/llvm-project/commit/049efc73bed254fb34683615926a997b0bf30243.diff LOG: libclc: Add amdgpu subgroup functions (#184845) Added: libclc/opencl/lib/amdgcn/subgroup/subgroup.cl libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl Modified: libclc/opencl/lib/amdgcn/SOURCES Removed: ################################################################################ diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES index 84fc4a6650c32..9b3ddf192e3dc 100644 --- a/libclc/opencl/lib/amdgcn/SOURCES +++ b/libclc/opencl/lib/amdgcn/SOURCES @@ -1,4 +1,6 @@ mem_fence/fence.cl +subgroup/subgroup.cl +synchronization/sub_group_barrier.cl workitem/get_global_offset.cl workitem/get_group_id.cl workitem/get_global_size.cl diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl new file mode 100644 index 0000000000000..d67d84e763b4f --- /dev/null +++ b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/opencl/opencl-base.h> + +static uint wavesize_log2() { + return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5; +} + +static uint workgroup_size() { + return mul24((uint)get_local_size(2), + mul24((uint)get_local_size(1), (uint)get_local_size(0))); +} + +static uint enqueued_workgroup_size() { + return mul24((uint)get_enqueued_local_size(2), + mul24((uint)get_enqueued_local_size(1), + (uint)get_enqueued_local_size(0))); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { + uint wavesize = __builtin_amdgcn_wavefrontsize(); + uint lid = (uint)get_local_linear_id(); + return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1))); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { + return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size()); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { + return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >> + wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >> + wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { + return (uint)get_local_linear_id() >> wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) { + return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) { + return __builtin_amdgcn_ballot_w64(x) != 0; +} diff --git a/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl new file mode 100644 index 0000000000000..2b57d86294ef5 --- /dev/null +++ b/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/opencl/opencl-base.h> + +_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags, + memory_scope scope) { + __builtin_amdgcn_wave_barrier(); + + if (flags) + atomic_work_item_fence(flags, memory_order_acq_rel, scope); +} + +_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags) { + sub_group_barrier(flags, memory_scope_sub_group); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
