Author: Matt Arsenault Date: 2026-03-08T09:24:33+01:00 New Revision: 78c6ebd2cdfc0fc810448d0fc23a18bc39b363cf
URL: https://github.com/llvm/llvm-project/commit/78c6ebd2cdfc0fc810448d0fc23a18bc39b363cf DIFF: https://github.com/llvm/llvm-project/commit/78c6ebd2cdfc0fc810448d0fc23a18bc39b363cf.diff LOG: libclc: Move subgroup functions into clc (#185220) It turns out there was a generic implementation of the id and sizes. The practice of splitting every single function into its own file is kind of a pain here, so introduce a utility header for amdgpu. Added: libclc/clc/include/clc/amdgpu/amdgpu_utils.h libclc/clc/include/clc/subgroup/clc_subgroup.h libclc/clc/lib/amdgcn/subgroup/subgroup.cl libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl libclc/opencl/lib/generic/subgroup/subgroup.cl Modified: libclc/clc/lib/amdgcn/SOURCES libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl libclc/opencl/lib/amdgcn/SOURCES libclc/opencl/lib/generic/SOURCES Removed: libclc/opencl/lib/amdgcn/subgroup/subgroup.cl ################################################################################ diff --git a/libclc/clc/include/clc/amdgpu/amdgpu_utils.h b/libclc/clc/include/clc/amdgpu/amdgpu_utils.h new file mode 100644 index 0000000000000..40c5d770c7bde --- /dev/null +++ b/libclc/clc/include/clc/amdgpu/amdgpu_utils.h @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/integer/clc_mul24.h" +#include "clc/workitem/clc_get_enqueued_local_size.h" +#include "clc/workitem/clc_get_local_size.h" + +static inline uint __clc_amdgpu_workgroup_size() { + return __clc_mul24((uint)__clc_get_local_size(2), + __clc_mul24((uint)__clc_get_local_size(1), + (uint)__clc_get_local_size(0))); +} + +static inline uint __clc_amdgpu_enqueued_workgroup_size() { + return __clc_mul24((uint)__clc_get_enqueued_local_size(2), + __clc_mul24((uint)__clc_get_enqueued_local_size(1), + (uint)__clc_get_enqueued_local_size(0))); +} + +static inline uint __clc_amdgpu_wavesize_log2() { + return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5; +} diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup.h b/libclc/clc/include/clc/subgroup/clc_subgroup.h new file mode 100644 index 0000000000000..f0a2a11d48445 --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_subgroup.h @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_SUBGROUP_H__ +#define __CLC_SUBGROUP_CLC_SUB_GROUP_SUBGROUP_H__ + +#include "clc/internal/clc.h" + +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_max_sub_group_size(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_any(int x); + +#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_SUBGROUP_H__ diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index a280461b1664a..a0b6c168b207e 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -1,6 +1,7 @@ address_space/qualifier.cl math/clc_ldexp.cl mem_fence/clc_mem_fence.cl +subgroup/subgroup.cl subgroup/sub_group_broadcast.cl synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl @@ -12,4 +13,7 @@ workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl workitem/clc_get_max_sub_group_size.cl workitem/clc_get_num_groups.cl +workitem/clc_get_num_sub_groups.cl +workitem/clc_get_sub_group_id.cl +workitem/clc_get_sub_group_size.cl workitem/clc_get_work_dim.cl diff --git a/libclc/clc/lib/amdgcn/subgroup/subgroup.cl b/libclc/clc/lib/amdgcn/subgroup/subgroup.cl new file mode 100644 index 0000000000000..71f4abc42e895 --- /dev/null +++ b/libclc/clc/lib/amdgcn/subgroup/subgroup.cl @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/subgroup/clc_subgroup.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { + return (__clc_amdgpu_enqueued_workgroup_size() + + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x) { + return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_any(int x) { + return __builtin_amdgcn_ballot_w64(x) != 0; +} diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl index cc56f8d9c325d..7df7f21d9098f 100644 --- a/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// -#include <clc/workitem/clc_get_max_sub_group_size.h> +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/shared/clc_min.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" _CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() { - return __builtin_amdgcn_wavefrontsize(); + return __clc_min(__builtin_amdgcn_wavefrontsize(), + __clc_amdgpu_enqueued_workgroup_size()); } diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl new file mode 100644 index 0000000000000..cb71ef282466b --- /dev/null +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/subgroup/clc_subgroup.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) { + uint group_size = __clc_amdgpu_workgroup_size(); + return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl new file mode 100644 index 0000000000000..ba3baf98bda14 --- /dev/null +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_sub_group_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void) { + return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2(); +} diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl new file mode 100644 index 0000000000000..77c9f8e91d8ee --- /dev/null +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/shared/clc_min.h" +#include "clc/workitem/clc_get_local_linear_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) { + uint wavesize = __builtin_amdgcn_wavefrontsize(); + uint lid = (uint)__clc_get_local_linear_id(); + return __clc_min(wavesize, + __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1))); +} diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES index 7010953d28100..78877425504d6 100644 --- a/libclc/opencl/lib/amdgcn/SOURCES +++ b/libclc/opencl/lib/amdgcn/SOURCES @@ -1,3 +1,2 @@ async/wait_group_events.cl printf/__printf_alloc.cl -subgroup/subgroup.cl diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl deleted file mode 100644 index d67d84e763b4f..0000000000000 --- a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl +++ /dev/null @@ -1,60 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/opencl/opencl-base.h> - -static uint wavesize_log2() { - return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5; -} - -static uint workgroup_size() { - return mul24((uint)get_local_size(2), - mul24((uint)get_local_size(1), (uint)get_local_size(0))); -} - -static uint enqueued_workgroup_size() { - return mul24((uint)get_enqueued_local_size(2), - mul24((uint)get_enqueued_local_size(1), - (uint)get_enqueued_local_size(0))); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { - uint wavesize = __builtin_amdgcn_wavefrontsize(); - uint lid = (uint)get_local_linear_id(); - return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1))); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { - return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size()); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { - return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >> - wavesize_log2(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { - return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >> - wavesize_log2(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { - return (uint)get_local_linear_id() >> wavesize_log2(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) { - return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) { - return __builtin_amdgcn_ballot_w64(x) != 0; -} diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES index 8e2df4a3e910a..f735c66548c30 100644 --- a/libclc/opencl/lib/generic/SOURCES +++ b/libclc/opencl/lib/generic/SOURCES @@ -201,6 +201,7 @@ shared/max.cl shared/min.cl shared/vload.cl shared/vstore.cl +subgroup/subgroup.cl subgroup/sub_group_broadcast.cl synchronization/sub_group_barrier.cl synchronization/work_group_barrier.cl diff --git a/libclc/opencl/lib/generic/subgroup/subgroup.cl b/libclc/opencl/lib/generic/subgroup/subgroup.cl new file mode 100644 index 0000000000000..fd552ada4afaf --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/subgroup.cl @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/clc_subgroup.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { + return __clc_get_sub_group_size(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { + return __clc_get_max_sub_group_size(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { + return __clc_get_num_sub_groups(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return __clc_get_enqueued_num_sub_groups(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { + return __clc_get_sub_group_id(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { + return __clc_get_sub_group_local_id(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) { + return __clc_sub_group_all(x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) { + return __clc_sub_group_any(x); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
