https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/184846

>From 5af503f1ab2839471cb38ed49ac40473df1d44ef Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Thu, 5 Mar 2026 18:26:23 +0100
Subject: [PATCH] libclc: Add sub_group_broadcast

---
 .../clc/subgroup/clc_subgroup_broadcast.inc   | 10 +++
 .../clc_subgroup_broadcast_scalarize.inc      | 21 +++++
 .../clc/subgroup/sub_group_broadcast.h        | 22 +++++
 libclc/clc/lib/amdgcn/SOURCES                 |  1 +
 .../amdgcn/subgroup/sub_group_broadcast.cl    | 88 +++++++++++++++++++
 libclc/opencl/lib/generic/SOURCES             |  1 +
 .../generic/subgroup/sub_group_broadcast.cl   | 15 ++++
 .../generic/subgroup/sub_group_broadcast.inc  | 16 ++++
 8 files changed, 174 insertions(+)
 create mode 100644 libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc
 create mode 100644 
libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc
 create mode 100644 libclc/clc/include/clc/subgroup/sub_group_broadcast.h
 create mode 100644 libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl
 create mode 100644 libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl
 create mode 100644 libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc

diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc 
b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc
new file mode 100644
index 0000000000000..7cf735ac22770
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast.inc
@@ -0,0 +1,10 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_GENTYPE a, uint sub_group_local_id);
diff --git 
a/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc 
b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc
new file mode 100644
index 0000000000000..c2ef6ebf5131e
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_subgroup_broadcast_scalarize.inc
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_VECSIZE_OR_1 >= 2
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
+__clc_sub_group_broadcast(__CLC_GENTYPE x, uint sub_group_local_id) {
+  union {
+    __CLC_GENTYPE vec;
+    __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1];
+  } u_x, u_result;
+  u_x.vec = x;
+  for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i)
+    u_result.arr[i] = __clc_sub_group_broadcast(u_x.arr[i], 
sub_group_local_id);
+  return u_result.vec;
+}
+#endif
diff --git a/libclc/clc/include/clc/subgroup/sub_group_broadcast.h 
b/libclc/clc/include/clc/subgroup/sub_group_broadcast.h
new file mode 100644
index 0000000000000..1e17b078f4836
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/sub_group_broadcast.h
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__
+#define __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__
+
+#include "clc/internal/clc.h"
+
+#define __CLC_FUNCTION __clc_sub_group_broadcast
+
+#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast.inc>
+#include <clc/math/gentype.inc>
+
+#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__
diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index 7006f538d9270..959e4fb48e97a 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,6 +1,7 @@
 address_space/qualifier.cl
 math/clc_ldexp.cl
 mem_fence/clc_mem_fence.cl
+subgroup/sub_group_broadcast.cl
 synchronization/clc_work_group_barrier.cl
 workitem/clc_get_enqueued_local_size.cl
 workitem/clc_get_global_offset.cl
diff --git a/libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl 
b/libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl
new file mode 100644
index 0000000000000..005b1e8090373
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/subgroup/sub_group_broadcast.cl
@@ -0,0 +1,88 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/sub_group_broadcast.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST char
+__clc_sub_group_broadcast(char x, uint sub_group_local_id) {
+  uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id);
+  return (char)__builtin_amdgcn_readlane((uint)x, j);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar
+__clc_sub_group_broadcast(uchar x, uint sub_group_local_id) {
+  uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id);
+  return (uchar)__builtin_amdgcn_readlane((uint)x, j);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST short
+__clc_sub_group_broadcast(short x, uint sub_group_local_id) {
+  uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id);
+  return (short)__builtin_amdgcn_readlane((uint)x, j);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort
+__clc_sub_group_broadcast(ushort x, uint sub_group_local_id) {
+  uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id);
+  return (ushort)__builtin_amdgcn_readlane((uint)x, j);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int
+__clc_sub_group_broadcast(int x, uint sub_group_local_id) {
+  return (int)__clc_sub_group_broadcast((uint)x, sub_group_local_id);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint
+__clc_sub_group_broadcast(uint x, uint sub_group_local_id) {
+  uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id);
+  return __builtin_amdgcn_readlane(x, j);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST long
+__clc_sub_group_broadcast(long x, uint sub_group_local_id) {
+  return (long)__clc_sub_group_broadcast((ulong)x, sub_group_local_id);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong
+__clc_sub_group_broadcast(ulong x, uint sub_group_local_id) {
+  uint j = __builtin_amdgcn_readfirstlane(sub_group_local_id);
+  uint2 as_vec = __builtin_astype(x, uint2);
+  as_vec.x = __builtin_amdgcn_readlane(as_vec.x, j);
+  as_vec.y = __builtin_amdgcn_readlane(as_vec.y, j);
+  return __builtin_astype(as_vec, ulong);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST half
+__clc_sub_group_broadcast(half x, uint sub_group_local_id) {
+  ushort bitcast = __builtin_astype(x, ushort);
+  uint broadcast = __clc_sub_group_broadcast((uint)bitcast, 
sub_group_local_id);
+  return __builtin_astype((ushort)broadcast, half);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST float
+__clc_sub_group_broadcast(float x, uint sub_group_local_id) {
+  uint broadcast =
+      __clc_sub_group_broadcast(__builtin_astype(x, uint), sub_group_local_id);
+  return __builtin_astype(broadcast, float);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST double
+__clc_sub_group_broadcast(double x, uint sub_group_local_id) {
+  uint bitcast = __builtin_astype(x, double);
+  ulong broadcast = __clc_sub_group_broadcast(bitcast, sub_group_local_id);
+  return __builtin_astype(broadcast, double);
+}
+
+#define __CLC_FUNCTION __clc_sub_group_broadcast
+#define __CLC_ARG2_TYPE uint
+
+#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast_scalarize.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/subgroup/clc_subgroup_broadcast_scalarize.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/opencl/lib/generic/SOURCES 
b/libclc/opencl/lib/generic/SOURCES
index 43f70cf37a377..312657f3bf106 100644
--- a/libclc/opencl/lib/generic/SOURCES
+++ b/libclc/opencl/lib/generic/SOURCES
@@ -200,6 +200,7 @@ shared/max.cl
 shared/min.cl
 shared/vload.cl
 shared/vstore.cl
+subgroup/sub_group_broadcast.cl
 synchronization/work_group_barrier.cl
 workitem/get_enqueued_local_size.cl
 workitem/get_global_id.cl
diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl 
b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl
new file mode 100644
index 0000000000000..a2e4fae6795cc
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/sub_group_broadcast.h"
+
+#define __CLC_BODY <sub_group_broadcast.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <sub_group_broadcast.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc 
b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc
new file mode 100644
index 0000000000000..7fa1c5cd27d14
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_broadcast.inc
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR) || defined(cl_khr_subgroup_extended_types)
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE
+sub_group_broadcast(__CLC_GENTYPE x, uint sub_group_local_id) {
+  return __clc_sub_group_broadcast(x, sub_group_local_id);
+}
+
+#endif

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to