https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/184846
>From 5af503f1ab2839471cb38ed49ac40473df1d44ef Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Thu, 5 Mar 2026 18:26:23 +0100 Subject: [PATCH] libclc: Add sub_group_broadcast --- .../clc/subgroup/clc_subgroup_broadcast.inc | 10 +++ .../clc_subgroup_broadcast_scalarize.inc | 21 +++++ .../clc/subgroup/sub_group_broadcast.h | 22 +++++ libclc/clc/lib/amdgcn/SOURCES | 1 + .../amdgcn/subgroup/sub_group_broadcast.cl | 88 +++++++++++++++++++ libclc/opencl/lib/generic/SOURCES | 1 + .../generic/subgroup/sub_group_broadcast.cl | 15 ++++ .../generic/subgroup/sub_group_broadcast.inc | 16 ++++ 8 files changed, 174 insertions(+) create mode 100644 libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc create mode 100644 libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc create mode 100644 libclc/clc/include/clc/subgroup/sub_group_broadcast.h create mode 100644 libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl create mode 100644 libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl create mode 100644 libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc new file mode 100644 index 0000000000000..7cf735ac22770 --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc @@ -0,0 +1,10 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__CLC_FUNCTION(__CLC_GENTYPE a, uint sub_group_local_id); diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc new file mode 100644 index 0000000000000..c2ef6ebf5131e --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_VECSIZE_OR_1 >= 2 +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE +__clc_sub_group_broadcast(__CLC_GENTYPE x, uint sub_group_local_id) { + union { + __CLC_GENTYPE vec; + __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1]; + } u_x, u_result; + u_x.vec = x; + for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i) + u_result.arr[i] = __clc_sub_group_broadcast(u_x.arr[i], sub_group_local_id); + return u_result.vec; +} +#endif diff --git a/libclc/clc/include/clc/subgroup/sub_group_broadcast.h b/libclc/clc/include/clc/subgroup/sub_group_broadcast.h new file mode 100644 index 0000000000000..1e17b078f4836 --- /dev/null +++ b/libclc/clc/include/clc/subgroup/sub_group_broadcast.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__ +#define __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__ + +#include "clc/internal/clc.h" + +#define __CLC_FUNCTION __clc_sub_group_broadcast + +#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast.inc> +#include <clc/math/gentype.inc> + +#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__ diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index 7006f538d9270..959e4fb48e97a 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -1,6 +1,7 @@ address_space/qualifier.cl math/clc_ldexp.cl mem_fence/clc_mem_fence.cl +subgroup/sub_group_broadcast.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl workitem/clc_get_global_offset.cl diff --git a/libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl b/libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl new file mode 100644 index 0000000000000..005b1e8090373 --- /dev/null +++ b/libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/sub_group_broadcast.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST char +__clc_sub_group_broadcast(char x, uint sub_group_local_id) { + uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id); + return (char)__builtin_amdgcn_readlane((uint)x, j); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar +__clc_sub_group_broadcast(uchar x, uint sub_group_local_id) { + uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id); + return (uchar)__builtin_amdgcn_readlane((uint)x, j); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST short +__clc_sub_group_broadcast(short x, uint sub_group_local_id) { + uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id); + return (short)__builtin_amdgcn_readlane((uint)x, j); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort +__clc_sub_group_broadcast(ushort x, uint sub_group_local_id) { + uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id); + return (ushort)__builtin_amdgcn_readlane((uint)x, j); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int +__clc_sub_group_broadcast(int x, uint sub_group_local_id) { + return (int)__clc_sub_group_broadcast((uint)x, sub_group_local_id); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint +__clc_sub_group_broadcast(uint x, uint sub_group_local_id) { + uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id); + return __builtin_amdgcn_readlane(x, j); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST long +__clc_sub_group_broadcast(long x, uint sub_group_local_id) { + return (long)__clc_sub_group_broadcast((ulong)x, sub_group_local_id); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong +__clc_sub_group_broadcast(ulong x, uint sub_group_local_id) { + uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id); + uint2 as_vec = __builtin_astype(x, uint2); + as_vec.x = __builtin_amdgcn_readlane(as_vec.x, j); + as_vec.y = __builtin_amdgcn_readlane(as_vec.y, j); + return __builtin_astype(as_vec, ulong); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST half +__clc_sub_group_broadcast(half x, uint sub_group_local_id) { + ushort bitcast = __builtin_astype(x, ushort); + uint broadcast = __clc_sub_group_broadcast((uint)bitcast, sub_group_local_id); + return __builtin_astype((ushort)broadcast, half); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST float +__clc_sub_group_broadcast(float x, uint sub_group_local_id) { + uint broadcast = + __clc_sub_group_broadcast(__builtin_astype(x, uint), sub_group_local_id); + return __builtin_astype(broadcast, float); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST double +__clc_sub_group_broadcast(double x, uint sub_group_local_id) { + uint bitcast = __builtin_astype(x, double); + ulong broadcast = __clc_sub_group_broadcast(bitcast, sub_group_local_id); + return __builtin_astype(broadcast, double); +} + +#define __CLC_FUNCTION __clc_sub_group_broadcast +#define __CLC_ARG2_TYPE uint + +#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast_scalarize.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast_scalarize.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES index 43f70cf37a377..312657f3bf106 100644 --- a/libclc/opencl/lib/generic/SOURCES +++ b/libclc/opencl/lib/generic/SOURCES @@ -200,6 +200,7 @@ shared/max.cl shared/min.cl shared/vload.cl shared/vstore.cl +subgroup/sub_group_broadcast.cl synchronization/work_group_barrier.cl workitem/get_enqueued_local_size.cl workitem/get_global_id.cl diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl new file mode 100644 index 0000000000000..a2e4fae6795cc --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/sub_group_broadcast.h" + +#define __CLC_BODY <sub_group_broadcast.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <sub_group_broadcast.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc new file mode 100644 index 0000000000000..7fa1c5cd27d14 --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) || defined(cl_khr_subgroup_extended_types) + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE +sub_group_broadcast(__CLC_GENTYPE x, uint sub_group_local_id) { + return __clc_sub_group_broadcast(x, sub_group_local_id); +} + +#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
