https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/184845

>From 96af4c36b111c125f906a69cd3b64bf25a57d36b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Thu, 5 Mar 2026 15:53:22 +0100
Subject: [PATCH] libclc: Add amdgpu subgroup functions

---
 libclc/opencl/lib/amdgcn/SOURCES              |  2 +
 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl | 60 +++++++++++++++++++
 .../synchronization/sub_group_barrier.cl      | 21 +++++++
 3 files changed, 83 insertions(+)
 create mode 100644 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
 create mode 100644 
libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl

diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 84fc4a6650c32..9b3ddf192e3dc 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -1,4 +1,6 @@
 mem_fence/fence.cl
+subgroup/subgroup.cl
+synchronization/sub_group_barrier.cl
 workitem/get_global_offset.cl
 workitem/get_group_id.cl
 workitem/get_global_size.cl
diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl 
b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
new file mode 100644
index 0000000000000..d67d84e763b4f
--- /dev/null
+++ b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/opencl/opencl-base.h>
+
+static uint wavesize_log2() {
+  return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5;
+}
+
+static uint workgroup_size() {
+  return mul24((uint)get_local_size(2),
+               mul24((uint)get_local_size(1), (uint)get_local_size(0)));
+}
+
+static uint enqueued_workgroup_size() {
+  return mul24((uint)get_enqueued_local_size(2),
+               mul24((uint)get_enqueued_local_size(1),
+                     (uint)get_enqueued_local_size(0)));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+  uint wavesize = __builtin_amdgcn_wavefrontsize();
+  uint lid = (uint)get_local_linear_id();
+  return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1)));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+  return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size());
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+  return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
+         wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
+         wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+  return (uint)get_local_linear_id() >> wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
+  return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) {
+  return __builtin_amdgcn_ballot_w64(x) != 0;
+}
diff --git a/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl 
b/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl
new file mode 100644
index 0000000000000..2b57d86294ef5
--- /dev/null
+++ b/libclc/opencl/lib/amdgcn/synchronization/sub_group_barrier.cl
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/opencl/opencl-base.h>
+
+_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags,
+                                              memory_scope scope) {
+  __builtin_amdgcn_wave_barrier();
+
+  if (flags)
+    atomic_work_item_fence(flags, memory_order_acq_rel, scope);
+}
+
+_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags) {
+  sub_group_barrier(flags, memory_scope_sub_group);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to