https://github.com/wenju-he updated 
https://github.com/llvm/llvm-project/pull/189328

>From 494cb1c1b5e176e415364a25248ddeb2ddefad33 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 30 Mar 2026 09:09:55 +0200
Subject: [PATCH 1/4] [libclc][NFC] De-duplicate subgroup workitem function
 decls and reorganize

De-duplicate since some are already declaration in include/clc/workitem.

Move subgroup workitem function implementations into individual files
to align with other workitem functions.
---
 .../clc/include/clc/subgroup/clc_subgroup.h   |  6 -----
 .../clc_get_enqueued_num_sub_groups.h         | 16 +++++++++++++
 .../clc/workitem/clc_get_max_sub_group_size.h |  2 +-
 .../clc/workitem/clc_get_num_sub_groups.h     |  2 +-
 .../clc/workitem/clc_get_sub_group_id.h       |  2 +-
 .../clc/workitem/clc_get_sub_group_local_id.h |  2 +-
 .../clc/workitem/clc_get_sub_group_size.h     |  2 +-
 libclc/clc/lib/amdgpu/CMakeLists.txt          |  2 ++
 .../lib/amdgpu/subgroup/clc_sub_group_scan.cl |  1 +
 .../clc/lib/amdgpu/subgroup/clc_subgroup.cl   | 10 --------
 .../clc_get_enqueued_num_sub_groups.cl        | 16 +++++++++++++
 .../amdgpu/workitem/clc_get_num_sub_groups.cl |  2 +-
 .../workitem/clc_get_sub_group_local_id.cl    | 13 ++++++++++
 .../amdgpu/workitem/clc_get_sub_group_size.cl |  1 +
 .../collective/clc_work_group_any_all.cl      |  3 +++
 .../collective/clc_work_group_broadcast.cl    |  1 +
 .../generic/collective/clc_work_group_scan.cl |  1 +
 .../generic/workitem/clc_get_sub_group_id.cl  |  1 +
 libclc/opencl/lib/generic/CMakeLists.txt      |  6 +++++
 .../opencl/lib/generic/subgroup/subgroup.cl   | 24 -------------------
 .../workitem/get_enqueued_num_sub_groups.cl   | 13 ++++++++++
 .../workitem/get_max_sub_group_size.cl        | 13 ++++++++++
 .../generic/workitem/get_num_sub_groups.cl    | 13 ++++++++++
 .../lib/generic/workitem/get_sub_group_id.cl  | 13 ++++++++++
 .../workitem/get_sub_group_local_id.cl        | 13 ++++++++++
 .../generic/workitem/get_sub_group_size.cl    | 13 ++++++++++
 26 files changed, 145 insertions(+), 46 deletions(-)
 create mode 100644 
libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h
 create mode 100644 
libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
 create mode 100644 
libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl

diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup.h 
b/libclc/clc/include/clc/subgroup/clc_subgroup.h
index f0a2a11d48445..133ba33644120 100644
--- a/libclc/clc/include/clc/subgroup/clc_subgroup.h
+++ b/libclc/clc/include/clc/subgroup/clc_subgroup.h
@@ -11,12 +11,6 @@
 
 #include "clc/internal/clc.h"
 
-_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void);
-_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_max_sub_group_size(void);
-_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void);
-_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint 
__clc_get_enqueued_num_sub_groups(void);
-_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void);
-_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void);
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x);
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_any(int x);
 
diff --git a/libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h 
b/libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h
new file mode 100644
index 0000000000000..14afdd80ca11f
--- /dev/null
+++ b/libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_WORKITEM_CLC_GET_ENQUEUED_NUM_SUB_GROUPS_H__
+#define __CLC_WORKITEM_CLC_GET_ENQUEUED_NUM_SUB_GROUPS_H__
+
+#include "clc/internal/clc.h"
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint 
__clc_get_enqueued_num_sub_groups(void);
+
+#endif // __CLC_WORKITEM_CLC_GET_ENQUEUED_NUM_SUB_GROUPS_H__
diff --git a/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h 
b/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h
index a5c98aeba94b8..d5a3a13945e7b 100644
--- a/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h
+++ b/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h
@@ -11,6 +11,6 @@
 
 #include "clc/internal/clc.h"
 
-_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_max_sub_group_size(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_max_sub_group_size(void);
 
 #endif // __CLC_WORKITEM_CLC_GET_MAX_SUB_GROUP_SIZE_H__
diff --git a/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h 
b/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h
index b584df98e44a6..3d1da26e8a02a 100644
--- a/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h
+++ b/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h
@@ -11,6 +11,6 @@
 
 #include "clc/internal/clc.h"
 
-_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_num_sub_groups();
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void);
 
 #endif // __CLC_WORKITEM_CLC_GET_NUM_SUB_GROUPS_H__
diff --git a/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h 
b/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h
index 44a4459aa48b4..b21e3c1d8df05 100644
--- a/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h
+++ b/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h
@@ -11,6 +11,6 @@
 
 #include "clc/internal/clc.h"
 
-_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_sub_group_id();
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void);
 
 #endif // __CLC_WORKITEM_CLC_GET_SUB_GROUP_ID_H__
diff --git a/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h 
b/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h
index 52e4b3f28083a..0cf8890dd46b5 100644
--- a/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h
+++ b/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h
@@ -11,6 +11,6 @@
 
 #include "clc/internal/clc.h"
 
-_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_sub_group_local_id();
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void);
 
 #endif // __CLC_WORKITEM_CLC_GET_SUB_GROUP_LOCAL_ID_H__
diff --git a/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h 
b/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h
index 4603bfdcbeb25..1dd857d16a2bf 100644
--- a/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h
+++ b/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h
@@ -11,6 +11,6 @@
 
 #include "clc/internal/clc.h"
 
-_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_sub_group_size();
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void);
 
 #endif // __CLC_WORKITEM_CLC_GET_SUB_GROUP_SIZE_H__
diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt 
b/libclc/clc/lib/amdgpu/CMakeLists.txt
index a5cd47fab4462..69af2ebe525ad 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -35,6 +35,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   synchronization/clc_sub_group_barrier.cl
   synchronization/clc_work_group_barrier.cl
   workitem/clc_get_enqueued_local_size.cl
+  workitem/clc_get_enqueued_num_sub_groups.cl
   workitem/clc_get_global_offset.cl
   workitem/clc_get_global_size.cl
   workitem/clc_get_group_id.cl
@@ -44,6 +45,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   workitem/clc_get_num_groups.cl
   workitem/clc_get_num_sub_groups.cl
   workitem/clc_get_sub_group_id.cl
+  workitem/clc_get_sub_group_local_id.cl
   workitem/clc_get_sub_group_size.cl
   workitem/clc_get_work_dim.cl)
 
diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl 
b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
index 3ef735aac2aae..573866dee1fa0 100644
--- a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
@@ -14,6 +14,7 @@
 #include "clc/subgroup/clc_sub_group_broadcast.h"
 #include "clc/subgroup/clc_sub_group_scan.h"
 #include "clc/subgroup/clc_subgroup.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
 
 #define QUAD_PERM (1 << 15)
 
diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl 
b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl
index 71f4abc42e895..eda7ca2aff394 100644
--- a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl
@@ -9,16 +9,6 @@
 #include "clc/amdgpu/amdgpu_utils.h"
 #include "clc/subgroup/clc_subgroup.h"
 
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
-  return (__clc_amdgpu_enqueued_workgroup_size() +
-          __builtin_amdgcn_wavefrontsize() - 1) >>
-         __clc_amdgpu_wavesize_log2();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x) {
   return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
 }
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
new file mode 100644
index 0000000000000..bb702da96f0a1
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
+  return (__clc_amdgpu_enqueued_workgroup_size() +
+          __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
index cb71ef282466b..5dcd3a57b4a4c 100644
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
@@ -7,7 +7,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/subgroup/clc_subgroup.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
 
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) {
   uint group_size = __clc_amdgpu_workgroup_size();
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
new file mode 100644
index 0000000000000..2493cca0c365c
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
index 77c9f8e91d8ee..7ee264f94b0d0 100644
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
@@ -9,6 +9,7 @@
 #include "clc/amdgpu/amdgpu_utils.h"
 #include "clc/shared/clc_min.h"
 #include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
 
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) {
   uint wavesize = __builtin_amdgcn_wavefrontsize();
diff --git a/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl 
b/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl
index 4c79ef1f73eba..33fe5f7bd7ddb 100644
--- a/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl
+++ b/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl
@@ -13,6 +13,9 @@
 #include "clc/collective/clc_work_group_any_all.h"
 #include "clc/subgroup/clc_subgroup.h"
 #include "clc/synchronization/clc_work_group_barrier.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
 
 #pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable
 
diff --git a/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl 
b/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl
index ebf2d2eb1710f..cdecc39725647 100644
--- a/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl
+++ b/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl
@@ -13,6 +13,7 @@
 #include "clc/subgroup/clc_subgroup.h"
 #include "clc/synchronization/clc_work_group_barrier.h"
 #include "clc/workitem/clc_get_local_id.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
 
 #pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable
 
diff --git a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl 
b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl
index ae333cd9b8cdf..a4d377c0be964 100644
--- a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl
+++ b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl
@@ -22,6 +22,7 @@
 #include "clc/workitem/clc_get_num_sub_groups.h"
 #include "clc/workitem/clc_get_sub_group_id.h"
 #include "clc/workitem/clc_get_sub_group_local_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
 
 #pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable
 
diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl 
b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
index 02391c52ca813..67b008c312f29 100644
--- a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
+++ b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
@@ -8,6 +8,7 @@
 
 #include "clc/workitem/clc_get_local_linear_id.h"
 #include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
 
 _CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
   return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
diff --git a/libclc/opencl/lib/generic/CMakeLists.txt 
b/libclc/opencl/lib/generic/CMakeLists.txt
index 4ad60248139ae..1d0d7ddd705e8 100644
--- a/libclc/opencl/lib/generic/CMakeLists.txt
+++ b/libclc/opencl/lib/generic/CMakeLists.txt
@@ -215,6 +215,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   synchronization/sub_group_barrier.cl
   synchronization/work_group_barrier.cl
   workitem/get_enqueued_local_size.cl
+  workitem/get_enqueued_num_sub_groups.cl
   workitem/get_global_id.cl
   workitem/get_global_linear_id.cl
   workitem/get_global_offset.cl
@@ -223,7 +224,12 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   workitem/get_local_id.cl
   workitem/get_local_linear_id.cl
   workitem/get_local_size.cl
+  workitem/get_max_sub_group_size.cl
   workitem/get_num_groups.cl
+  workitem/get_num_sub_groups.cl
+  workitem/get_sub_group_id.cl
+  workitem/get_sub_group_local_id.cl
+  workitem/get_sub_group_size.cl
   workitem/get_work_dim.cl
 )
 
diff --git a/libclc/opencl/lib/generic/subgroup/subgroup.cl 
b/libclc/opencl/lib/generic/subgroup/subgroup.cl
index fd552ada4afaf..dfe9867fd0801 100644
--- a/libclc/opencl/lib/generic/subgroup/subgroup.cl
+++ b/libclc/opencl/lib/generic/subgroup/subgroup.cl
@@ -8,30 +8,6 @@
 
 #include "clc/subgroup/clc_subgroup.h"
 
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
-  return __clc_get_sub_group_size();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
-  return __clc_get_max_sub_group_size();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
-  return __clc_get_num_sub_groups();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
-  return __clc_get_enqueued_num_sub_groups();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
-  return __clc_get_sub_group_id();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
-  return __clc_get_sub_group_local_id();
-}
-
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
   return __clc_sub_group_all(x);
 }
diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
new file mode 100644
index 0000000000000..fee3a588c2bbf
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return __clc_get_enqueued_num_sub_groups();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
new file mode 100644
index 0000000000000..bbd19a88a0165
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+  return __clc_get_max_sub_group_size();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
new file mode 100644
index 0000000000000..77163234fe54d
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_num_sub_groups.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+  return __clc_get_num_sub_groups();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
new file mode 100644
index 0000000000000..a1ad6adb4e2cb
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+  return __clc_get_sub_group_id();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
new file mode 100644
index 0000000000000..33164282165b3
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+  return __clc_get_sub_group_local_id();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
new file mode 100644
index 0000000000000..62f3382b6d7df
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+  return __clc_get_sub_group_size();
+}

>From 65357182f47a30c4d49e343409e969d9fa986175 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 30 Mar 2026 10:31:35 +0200
Subject: [PATCH 2/4] merge subgroup workitem functions into one file

llvm-diff shows only nvptx64 libclc.bc changed. It is caused by
different optimization opportunities and pass ordering after the
subgroup helpers were merged into one .cl file.
---
 libclc/clc/lib/amdgpu/CMakeLists.txt          |  9 +---
 .../clc_get_enqueued_num_sub_groups.cl        | 16 -------
 .../workitem/clc_get_max_sub_group_size.cl    | 16 -------
 .../amdgpu/workitem/clc_get_num_sub_groups.cl | 16 -------
 .../amdgpu/workitem/clc_get_sub_group_id.cl   | 15 ------
 .../workitem/clc_get_sub_group_local_id.cl    | 13 -----
 .../amdgpu/workitem/clc_get_sub_group_size.cl | 19 --------
 .../amdgpu/workitem/clc_workitem_sub_group.cl | 48 +++++++++++++++++++
 libclc/clc/lib/generic/CMakeLists.txt         |  4 +-
 .../workitem/clc_get_num_sub_groups.cl        | 18 -------
 .../generic/workitem/clc_get_sub_group_id.cl  | 15 ------
 ...roup_size.cl => clc_workitem_sub_group.cl} | 12 +++++
 libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt    |  3 +-
 .../workitem/clc_get_max_sub_group_size.cl    | 13 -----
 .../workitem/clc_get_sub_group_local_id.cl    | 13 -----
 .../workitem/clc_workitem_sub_group.cl        | 47 ++++++++++++++++++
 libclc/opencl/lib/generic/CMakeLists.txt      |  7 +--
 .../workitem/get_enqueued_num_sub_groups.cl   | 13 -----
 .../workitem/get_max_sub_group_size.cl        | 13 -----
 .../generic/workitem/get_num_sub_groups.cl    | 13 -----
 .../lib/generic/workitem/get_sub_group_id.cl  | 13 -----
 .../workitem/get_sub_group_local_id.cl        | 13 -----
 .../generic/workitem/get_sub_group_size.cl    | 13 -----
 .../generic/workitem/workitem_sub_group.cl    | 38 +++++++++++++++
 24 files changed, 150 insertions(+), 250 deletions(-)
 delete mode 100644 
libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl
 delete mode 100644 libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl
 delete mode 100644 libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
 rename libclc/clc/lib/generic/workitem/{clc_get_sub_group_size.cl => 
clc_workitem_sub_group.cl} (70%)
 delete mode 100644 
libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl
 delete mode 100644 
libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl
 create mode 100644 
libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl
 delete mode 100644 
libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/workitem_sub_group.cl

diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt 
b/libclc/clc/lib/amdgpu/CMakeLists.txt
index 69af2ebe525ad..a65308cc34898 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -35,19 +35,14 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   synchronization/clc_sub_group_barrier.cl
   synchronization/clc_work_group_barrier.cl
   workitem/clc_get_enqueued_local_size.cl
-  workitem/clc_get_enqueued_num_sub_groups.cl
   workitem/clc_get_global_offset.cl
   workitem/clc_get_global_size.cl
   workitem/clc_get_group_id.cl
   workitem/clc_get_local_id.cl
   workitem/clc_get_local_size.cl
-  workitem/clc_get_max_sub_group_size.cl
   workitem/clc_get_num_groups.cl
-  workitem/clc_get_num_sub_groups.cl
-  workitem/clc_get_sub_group_id.cl
-  workitem/clc_get_sub_group_local_id.cl
-  workitem/clc_get_sub_group_size.cl
-  workitem/clc_get_work_dim.cl)
+  workitem/clc_get_work_dim.cl
+  workitem/clc_workitem_sub_group.cl)
 
 libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
   math/clc_native_exp.cl
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
deleted file mode 100644
index bb702da96f0a1..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
-  return (__clc_amdgpu_enqueued_workgroup_size() +
-          __builtin_amdgcn_wavefrontsize() - 1) >>
-         __clc_amdgpu_wavesize_log2();
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl
deleted file mode 100644
index 5eb0166135663..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/shared/clc_min.h"
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) {
-  return __clc_min(__builtin_amdgcn_wavefrontsize(),
-                   __clc_amdgpu_enqueued_workgroup_size());
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
deleted file mode 100644
index 5dcd3a57b4a4c..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/workitem/clc_get_num_sub_groups.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) {
-  uint group_size = __clc_amdgpu_workgroup_size();
-  return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >>
-         __clc_amdgpu_wavesize_log2();
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl
deleted file mode 100644
index ba3baf98bda14..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/workitem/clc_get_local_linear_id.h"
-#include "clc/workitem/clc_get_sub_group_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void) {
-  return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2();
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
deleted file mode 100644
index 2493cca0c365c..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
deleted file mode 100644
index 7ee264f94b0d0..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
+++ /dev/null
@@ -1,19 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/shared/clc_min.h"
-#include "clc/workitem/clc_get_local_linear_id.h"
-#include "clc/workitem/clc_get_sub_group_size.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) {
-  uint wavesize = __builtin_amdgcn_wavefrontsize();
-  uint lid = (uint)__clc_get_local_linear_id();
-  return __clc_min(wavesize,
-                   __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1)));
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl
new file mode 100644
index 0000000000000..e78e955ab5f56
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/shared/clc_min.h"
+#include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
+  return (__clc_amdgpu_enqueued_workgroup_size() +
+          __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) {
+  return __clc_min(__builtin_amdgcn_wavefrontsize(),
+                   __clc_amdgpu_enqueued_workgroup_size());
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_num_sub_groups(void) {
+  uint group_size = __clc_amdgpu_workgroup_size();
+  return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_id(void) {
+  return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_size(void) {
+  uint wavesize = __builtin_amdgcn_wavefrontsize();
+  uint lid = (uint)__clc_get_local_linear_id();
+  return __clc_min(wavesize,
+                   __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1)));
+}
diff --git a/libclc/clc/lib/generic/CMakeLists.txt 
b/libclc/clc/lib/generic/CMakeLists.txt
index 168a0f1ff1e84..0f2f46ccdf3c6 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -204,9 +204,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   workitem/clc_get_global_id.cl
   workitem/clc_get_global_linear_id.cl
   workitem/clc_get_local_linear_id.cl
-  workitem/clc_get_num_sub_groups.cl
-  workitem/clc_get_sub_group_id.cl
-  workitem/clc_get_sub_group_size.cl
+  workitem/clc_workitem_sub_group.cl
 )
 
 libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
diff --git a/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl 
b/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl
deleted file mode 100644
index 7d6d922d52bc4..0000000000000
--- a/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_local_size.h"
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-#include "clc/workitem/clc_get_num_sub_groups.h"
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() {
-  size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
-                       __clc_get_local_size(2);
-  uint sg_size = __clc_get_max_sub_group_size();
-  return (uint)((linear_size + sg_size - 1) / sg_size);
-}
diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl 
b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
deleted file mode 100644
index 67b008c312f29..0000000000000
--- a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_local_linear_id.h"
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-#include "clc/workitem/clc_get_sub_group_id.h"
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
-  return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
-}
diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl 
b/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl
similarity index 70%
rename from libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl
rename to libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl
index 7944486aac0f0..c9638c59e2877 100644
--- a/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl
+++ b/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl
@@ -6,12 +6,24 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include "clc/workitem/clc_get_local_linear_id.h"
 #include "clc/workitem/clc_get_local_size.h"
 #include "clc/workitem/clc_get_max_sub_group_size.h"
 #include "clc/workitem/clc_get_num_sub_groups.h"
 #include "clc/workitem/clc_get_sub_group_id.h"
 #include "clc/workitem/clc_get_sub_group_size.h"
 
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() {
+  size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
+                       __clc_get_local_size(2);
+  uint sg_size = __clc_get_max_sub_group_size();
+  return (uint)((linear_size + sg_size - 1) / sg_size);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
+  return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
+}
+
 _CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() {
   if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) {
     return __clc_get_max_sub_group_size();
diff --git a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt 
b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
index 6eb0baab1c0bb..6a5860027f0db 100644
--- a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
+++ b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
@@ -11,7 +11,6 @@ libclc_configure_source_list(CLC_PTX_NVIDIACL_SOURCES
   workitem/clc_get_group_id.cl
   workitem/clc_get_local_id.cl
   workitem/clc_get_local_size.cl
-  workitem/clc_get_max_sub_group_size.cl
   workitem/clc_get_num_groups.cl
-  workitem/clc_get_sub_group_local_id.cl
+  workitem/clc_workitem_sub_group.cl
 )
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl
deleted file mode 100644
index 9a380c2fc4b8f..0000000000000
--- a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() {
-  return __nvvm_read_ptx_sreg_warpsize();
-}
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl
deleted file mode 100644
index 7e61e09bff1e3..0000000000000
--- a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() {
-  return __nvvm_read_ptx_sreg_laneid();
-}
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl
new file mode 100644
index 0000000000000..bdc09c9f61714
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_local_size.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() {
+  return __nvvm_read_ptx_sreg_warpsize();
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() {
+  size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
+                       __clc_get_local_size(2);
+  uint sg_size = __clc_get_max_sub_group_size();
+  return (uint)((linear_size + sg_size - 1) / sg_size);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
+  return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() {
+  return __nvvm_read_ptx_sreg_laneid();
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() {
+  if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) {
+    return __clc_get_max_sub_group_size();
+  }
+  size_t size_x = __clc_get_local_size(0);
+  size_t size_y = __clc_get_local_size(1);
+  size_t size_z = __clc_get_local_size(2);
+  size_t linear_size = size_z * size_y * size_x;
+  size_t uniform_groups = __clc_get_num_sub_groups() - 1;
+  size_t uniform_size = __clc_get_max_sub_group_size() * uniform_groups;
+  return linear_size - uniform_size;
+}
diff --git a/libclc/opencl/lib/generic/CMakeLists.txt 
b/libclc/opencl/lib/generic/CMakeLists.txt
index 1d0d7ddd705e8..6877b937e20c5 100644
--- a/libclc/opencl/lib/generic/CMakeLists.txt
+++ b/libclc/opencl/lib/generic/CMakeLists.txt
@@ -215,7 +215,6 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   synchronization/sub_group_barrier.cl
   synchronization/work_group_barrier.cl
   workitem/get_enqueued_local_size.cl
-  workitem/get_enqueued_num_sub_groups.cl
   workitem/get_global_id.cl
   workitem/get_global_linear_id.cl
   workitem/get_global_offset.cl
@@ -224,13 +223,9 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   workitem/get_local_id.cl
   workitem/get_local_linear_id.cl
   workitem/get_local_size.cl
-  workitem/get_max_sub_group_size.cl
   workitem/get_num_groups.cl
-  workitem/get_num_sub_groups.cl
-  workitem/get_sub_group_id.cl
-  workitem/get_sub_group_local_id.cl
-  workitem/get_sub_group_size.cl
   workitem/get_work_dim.cl
+  workitem/workitem_sub_group.cl
 )
 
 libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
deleted file mode 100644
index fee3a588c2bbf..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
-  return __clc_get_enqueued_num_sub_groups();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
deleted file mode 100644
index bbd19a88a0165..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
-  return __clc_get_max_sub_group_size();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
deleted file mode 100644
index 77163234fe54d..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_num_sub_groups.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
-  return __clc_get_num_sub_groups();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
deleted file mode 100644
index a1ad6adb4e2cb..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
-  return __clc_get_sub_group_id();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
deleted file mode 100644
index 33164282165b3..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
-  return __clc_get_sub_group_local_id();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
deleted file mode 100644
index 62f3382b6d7df..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_size.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
-  return __clc_get_sub_group_size();
-}
diff --git a/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl 
b/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl
new file mode 100644
index 0000000000000..970c35f945458
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return __clc_get_enqueued_num_sub_groups();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_max_sub_group_size(void) {
+  return __clc_get_max_sub_group_size();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_num_sub_groups(void) {
+  return __clc_get_num_sub_groups();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_id(void) {
+  return __clc_get_sub_group_id();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_local_id(void) {
+  return __clc_get_sub_group_local_id();
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_size(void) {
+  return __clc_get_sub_group_size();
+}

>From 1d5d491d5c47198c215126ffef254f709eae8ab2 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 30 Mar 2026 11:28:32 +0200
Subject: [PATCH 3/4] Revert "merge subgroup workitem functions into one file"

This reverts commit 65357182f47a30c4d49e343409e969d9fa986175.
---
 libclc/clc/lib/amdgpu/CMakeLists.txt          |  9 +++-
 .../clc_get_enqueued_num_sub_groups.cl        | 16 +++++++
 .../workitem/clc_get_max_sub_group_size.cl    | 16 +++++++
 .../amdgpu/workitem/clc_get_num_sub_groups.cl | 16 +++++++
 .../amdgpu/workitem/clc_get_sub_group_id.cl   | 15 ++++++
 .../workitem/clc_get_sub_group_local_id.cl    | 13 +++++
 .../amdgpu/workitem/clc_get_sub_group_size.cl | 19 ++++++++
 .../amdgpu/workitem/clc_workitem_sub_group.cl | 48 -------------------
 libclc/clc/lib/generic/CMakeLists.txt         |  4 +-
 .../workitem/clc_get_num_sub_groups.cl        | 18 +++++++
 .../generic/workitem/clc_get_sub_group_id.cl  | 15 ++++++
 ...sub_group.cl => clc_get_sub_group_size.cl} | 12 -----
 libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt    |  3 +-
 .../workitem/clc_get_max_sub_group_size.cl    | 13 +++++
 .../workitem/clc_get_sub_group_local_id.cl    | 13 +++++
 .../workitem/clc_workitem_sub_group.cl        | 47 ------------------
 libclc/opencl/lib/generic/CMakeLists.txt      |  7 ++-
 .../workitem/get_enqueued_num_sub_groups.cl   | 13 +++++
 .../workitem/get_max_sub_group_size.cl        | 13 +++++
 .../generic/workitem/get_num_sub_groups.cl    | 13 +++++
 .../lib/generic/workitem/get_sub_group_id.cl  | 13 +++++
 .../workitem/get_sub_group_local_id.cl        | 13 +++++
 .../generic/workitem/get_sub_group_size.cl    | 13 +++++
 .../generic/workitem/workitem_sub_group.cl    | 38 ---------------
 24 files changed, 250 insertions(+), 150 deletions(-)
 create mode 100644 
libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
 create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl
 create mode 100644 libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl
 create mode 100644 libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
 rename libclc/clc/lib/generic/workitem/{clc_workitem_sub_group.cl => 
clc_get_sub_group_size.cl} (70%)
 create mode 100644 
libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl
 create mode 100644 
libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl
 delete mode 100644 
libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl
 create mode 100644 
libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
 create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/workitem_sub_group.cl

diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt 
b/libclc/clc/lib/amdgpu/CMakeLists.txt
index a65308cc34898..69af2ebe525ad 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -35,14 +35,19 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   synchronization/clc_sub_group_barrier.cl
   synchronization/clc_work_group_barrier.cl
   workitem/clc_get_enqueued_local_size.cl
+  workitem/clc_get_enqueued_num_sub_groups.cl
   workitem/clc_get_global_offset.cl
   workitem/clc_get_global_size.cl
   workitem/clc_get_group_id.cl
   workitem/clc_get_local_id.cl
   workitem/clc_get_local_size.cl
+  workitem/clc_get_max_sub_group_size.cl
   workitem/clc_get_num_groups.cl
-  workitem/clc_get_work_dim.cl
-  workitem/clc_workitem_sub_group.cl)
+  workitem/clc_get_num_sub_groups.cl
+  workitem/clc_get_sub_group_id.cl
+  workitem/clc_get_sub_group_local_id.cl
+  workitem/clc_get_sub_group_size.cl
+  workitem/clc_get_work_dim.cl)
 
 libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
   math/clc_native_exp.cl
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
new file mode 100644
index 0000000000000..bb702da96f0a1
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
+  return (__clc_amdgpu_enqueued_workgroup_size() +
+          __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl
new file mode 100644
index 0000000000000..5eb0166135663
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/shared/clc_min.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) {
+  return __clc_min(__builtin_amdgcn_wavefrontsize(),
+                   __clc_amdgpu_enqueued_workgroup_size());
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
new file mode 100644
index 0000000000000..5dcd3a57b4a4c
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) {
+  uint group_size = __clc_amdgpu_workgroup_size();
+  return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl
new file mode 100644
index 0000000000000..ba3baf98bda14
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void) {
+  return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2();
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
new file mode 100644
index 0000000000000..2493cca0c365c
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
new file mode 100644
index 0000000000000..7ee264f94b0d0
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/shared/clc_min.h"
+#include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) {
+  uint wavesize = __builtin_amdgcn_wavefrontsize();
+  uint lid = (uint)__clc_get_local_linear_id();
+  return __clc_min(wavesize,
+                   __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1)));
+}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl
deleted file mode 100644
index e78e955ab5f56..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl
+++ /dev/null
@@ -1,48 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/shared/clc_min.h"
-#include "clc/workitem/clc_get_local_linear_id.h"
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-#include "clc/workitem/clc_get_num_sub_groups.h"
-#include "clc/workitem/clc_get_sub_group_id.h"
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-#include "clc/workitem/clc_get_sub_group_size.h"
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
-  return (__clc_amdgpu_enqueued_workgroup_size() +
-          __builtin_amdgcn_wavefrontsize() - 1) >>
-         __clc_amdgpu_wavesize_log2();
-}
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) {
-  return __clc_min(__builtin_amdgcn_wavefrontsize(),
-                   __clc_amdgpu_enqueued_workgroup_size());
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_num_sub_groups(void) {
-  uint group_size = __clc_amdgpu_workgroup_size();
-  return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >>
-         __clc_amdgpu_wavesize_log2();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_id(void) {
-  return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_local_id(void) {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_size(void) {
-  uint wavesize = __builtin_amdgcn_wavefrontsize();
-  uint lid = (uint)__clc_get_local_linear_id();
-  return __clc_min(wavesize,
-                   __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1)));
-}
diff --git a/libclc/clc/lib/generic/CMakeLists.txt 
b/libclc/clc/lib/generic/CMakeLists.txt
index 0f2f46ccdf3c6..168a0f1ff1e84 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -204,7 +204,9 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   workitem/clc_get_global_id.cl
   workitem/clc_get_global_linear_id.cl
   workitem/clc_get_local_linear_id.cl
-  workitem/clc_workitem_sub_group.cl
+  workitem/clc_get_num_sub_groups.cl
+  workitem/clc_get_sub_group_id.cl
+  workitem/clc_get_sub_group_size.cl
 )
 
 libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
diff --git a/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl 
b/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl
new file mode 100644
index 0000000000000..7d6d922d52bc4
--- /dev/null
+++ b/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_local_size.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() {
+  size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
+                       __clc_get_local_size(2);
+  uint sg_size = __clc_get_max_sub_group_size();
+  return (uint)((linear_size + sg_size - 1) / sg_size);
+}
diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl 
b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
new file mode 100644
index 0000000000000..67b008c312f29
--- /dev/null
+++ b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
+  return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
+}
diff --git a/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl 
b/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl
similarity index 70%
rename from libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl
rename to libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl
index c9638c59e2877..7944486aac0f0 100644
--- a/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl
+++ b/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl
@@ -6,24 +6,12 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include "clc/workitem/clc_get_local_linear_id.h"
 #include "clc/workitem/clc_get_local_size.h"
 #include "clc/workitem/clc_get_max_sub_group_size.h"
 #include "clc/workitem/clc_get_num_sub_groups.h"
 #include "clc/workitem/clc_get_sub_group_id.h"
 #include "clc/workitem/clc_get_sub_group_size.h"
 
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() {
-  size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
-                       __clc_get_local_size(2);
-  uint sg_size = __clc_get_max_sub_group_size();
-  return (uint)((linear_size + sg_size - 1) / sg_size);
-}
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
-  return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
-}
-
 _CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() {
   if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) {
     return __clc_get_max_sub_group_size();
diff --git a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt 
b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
index 6a5860027f0db..6eb0baab1c0bb 100644
--- a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
+++ b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
@@ -11,6 +11,7 @@ libclc_configure_source_list(CLC_PTX_NVIDIACL_SOURCES
   workitem/clc_get_group_id.cl
   workitem/clc_get_local_id.cl
   workitem/clc_get_local_size.cl
+  workitem/clc_get_max_sub_group_size.cl
   workitem/clc_get_num_groups.cl
-  workitem/clc_workitem_sub_group.cl
+  workitem/clc_get_sub_group_local_id.cl
 )
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl
new file mode 100644
index 0000000000000..9a380c2fc4b8f
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() {
+  return __nvvm_read_ptx_sreg_warpsize();
+}
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl
new file mode 100644
index 0000000000000..7e61e09bff1e3
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() {
+  return __nvvm_read_ptx_sreg_laneid();
+}
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl
deleted file mode 100644
index bdc09c9f61714..0000000000000
--- a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl
+++ /dev/null
@@ -1,47 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_local_linear_id.h"
-#include "clc/workitem/clc_get_local_size.h"
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-#include "clc/workitem/clc_get_num_sub_groups.h"
-#include "clc/workitem/clc_get_sub_group_id.h"
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-#include "clc/workitem/clc_get_sub_group_size.h"
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() {
-  return __nvvm_read_ptx_sreg_warpsize();
-}
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() {
-  size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
-                       __clc_get_local_size(2);
-  uint sg_size = __clc_get_max_sub_group_size();
-  return (uint)((linear_size + sg_size - 1) / sg_size);
-}
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) {
-  return __clc_get_local_linear_id() / __clc_get_max_sub_group_size();
-}
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() {
-  return __nvvm_read_ptx_sreg_laneid();
-}
-
-_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() {
-  if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) {
-    return __clc_get_max_sub_group_size();
-  }
-  size_t size_x = __clc_get_local_size(0);
-  size_t size_y = __clc_get_local_size(1);
-  size_t size_z = __clc_get_local_size(2);
-  size_t linear_size = size_z * size_y * size_x;
-  size_t uniform_groups = __clc_get_num_sub_groups() - 1;
-  size_t uniform_size = __clc_get_max_sub_group_size() * uniform_groups;
-  return linear_size - uniform_size;
-}
diff --git a/libclc/opencl/lib/generic/CMakeLists.txt 
b/libclc/opencl/lib/generic/CMakeLists.txt
index 6877b937e20c5..1d0d7ddd705e8 100644
--- a/libclc/opencl/lib/generic/CMakeLists.txt
+++ b/libclc/opencl/lib/generic/CMakeLists.txt
@@ -215,6 +215,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   synchronization/sub_group_barrier.cl
   synchronization/work_group_barrier.cl
   workitem/get_enqueued_local_size.cl
+  workitem/get_enqueued_num_sub_groups.cl
   workitem/get_global_id.cl
   workitem/get_global_linear_id.cl
   workitem/get_global_offset.cl
@@ -223,9 +224,13 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   workitem/get_local_id.cl
   workitem/get_local_linear_id.cl
   workitem/get_local_size.cl
+  workitem/get_max_sub_group_size.cl
   workitem/get_num_groups.cl
+  workitem/get_num_sub_groups.cl
+  workitem/get_sub_group_id.cl
+  workitem/get_sub_group_local_id.cl
+  workitem/get_sub_group_size.cl
   workitem/get_work_dim.cl
-  workitem/workitem_sub_group.cl
 )
 
 libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func
diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
new file mode 100644
index 0000000000000..fee3a588c2bbf
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return __clc_get_enqueued_num_sub_groups();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
new file mode 100644
index 0000000000000..bbd19a88a0165
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+  return __clc_get_max_sub_group_size();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
new file mode 100644
index 0000000000000..77163234fe54d
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_num_sub_groups.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+  return __clc_get_num_sub_groups();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
new file mode 100644
index 0000000000000..a1ad6adb4e2cb
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+  return __clc_get_sub_group_id();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
new file mode 100644
index 0000000000000..33164282165b3
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+  return __clc_get_sub_group_local_id();
+}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
new file mode 100644
index 0000000000000..62f3382b6d7df
--- /dev/null
+++ b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+  return __clc_get_sub_group_size();
+}
diff --git a/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl 
b/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl
deleted file mode 100644
index 970c35f945458..0000000000000
--- a/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl
+++ /dev/null
@@ -1,38 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-#include "clc/workitem/clc_get_num_sub_groups.h"
-#include "clc/workitem/clc_get_sub_group_id.h"
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-#include "clc/workitem/clc_get_sub_group_size.h"
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_enqueued_num_sub_groups(void) {
-  return __clc_get_enqueued_num_sub_groups();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_max_sub_group_size(void) {
-  return __clc_get_max_sub_group_size();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_num_sub_groups(void) {
-  return __clc_get_num_sub_groups();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_id(void) {
-  return __clc_get_sub_group_id();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_local_id(void) {
-  return __clc_get_sub_group_local_id();
-}
-
-_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_size(void) {
-  return __clc_get_sub_group_size();
-}

>From 2dd30336fc85c9993c817ae84dd14f1d9a7f44db Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 30 Mar 2026 11:34:53 +0200
Subject: [PATCH 4/4] partially Revert "[libclc][NFC] De-duplicate subgroup
 workitem function decls and reorganize"

Only keep header changes
---
 libclc/clc/lib/amdgpu/CMakeLists.txt          |  2 --
 .../clc/lib/amdgpu/subgroup/clc_subgroup.cl   | 10 +++++++
 .../clc_get_enqueued_num_sub_groups.cl        | 16 ----------
 .../workitem/clc_get_sub_group_local_id.cl    | 13 --------
 libclc/opencl/lib/generic/CMakeLists.txt      |  6 ----
 .../opencl/lib/generic/subgroup/subgroup.cl   | 30 +++++++++++++++++++
 .../workitem/get_enqueued_num_sub_groups.cl   | 13 --------
 .../workitem/get_max_sub_group_size.cl        | 13 --------
 .../generic/workitem/get_num_sub_groups.cl    | 13 --------
 .../lib/generic/workitem/get_sub_group_id.cl  | 13 --------
 .../workitem/get_sub_group_local_id.cl        | 13 --------
 .../generic/workitem/get_sub_group_size.cl    | 13 --------
 12 files changed, 40 insertions(+), 115 deletions(-)
 delete mode 100644 
libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
 delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
 delete mode 100644 
libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
 delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl

diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt 
b/libclc/clc/lib/amdgpu/CMakeLists.txt
index 69af2ebe525ad..a5cd47fab4462 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -35,7 +35,6 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   synchronization/clc_sub_group_barrier.cl
   synchronization/clc_work_group_barrier.cl
   workitem/clc_get_enqueued_local_size.cl
-  workitem/clc_get_enqueued_num_sub_groups.cl
   workitem/clc_get_global_offset.cl
   workitem/clc_get_global_size.cl
   workitem/clc_get_group_id.cl
@@ -45,7 +44,6 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   workitem/clc_get_num_groups.cl
   workitem/clc_get_num_sub_groups.cl
   workitem/clc_get_sub_group_id.cl
-  workitem/clc_get_sub_group_local_id.cl
   workitem/clc_get_sub_group_size.cl
   workitem/clc_get_work_dim.cl)
 
diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl 
b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl
index eda7ca2aff394..71f4abc42e895 100644
--- a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl
@@ -9,6 +9,16 @@
 #include "clc/amdgpu/amdgpu_utils.h"
 #include "clc/subgroup/clc_subgroup.h"
 
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
+  return (__clc_amdgpu_enqueued_workgroup_size() +
+          __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x) {
   return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
 }
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
deleted file mode 100644
index bb702da96f0a1..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/amdgpu/amdgpu_utils.h"
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) 
{
-  return (__clc_amdgpu_enqueued_workgroup_size() +
-          __builtin_amdgcn_wavefrontsize() - 1) >>
-         __clc_amdgpu_wavesize_log2();
-}
diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl 
b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
deleted file mode 100644
index 2493cca0c365c..0000000000000
--- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
diff --git a/libclc/opencl/lib/generic/CMakeLists.txt 
b/libclc/opencl/lib/generic/CMakeLists.txt
index 1d0d7ddd705e8..4ad60248139ae 100644
--- a/libclc/opencl/lib/generic/CMakeLists.txt
+++ b/libclc/opencl/lib/generic/CMakeLists.txt
@@ -215,7 +215,6 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   synchronization/sub_group_barrier.cl
   synchronization/work_group_barrier.cl
   workitem/get_enqueued_local_size.cl
-  workitem/get_enqueued_num_sub_groups.cl
   workitem/get_global_id.cl
   workitem/get_global_linear_id.cl
   workitem/get_global_offset.cl
@@ -224,12 +223,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   workitem/get_local_id.cl
   workitem/get_local_linear_id.cl
   workitem/get_local_size.cl
-  workitem/get_max_sub_group_size.cl
   workitem/get_num_groups.cl
-  workitem/get_num_sub_groups.cl
-  workitem/get_sub_group_id.cl
-  workitem/get_sub_group_local_id.cl
-  workitem/get_sub_group_size.cl
   workitem/get_work_dim.cl
 )
 
diff --git a/libclc/opencl/lib/generic/subgroup/subgroup.cl 
b/libclc/opencl/lib/generic/subgroup/subgroup.cl
index dfe9867fd0801..4fd1f04ca7189 100644
--- a/libclc/opencl/lib/generic/subgroup/subgroup.cl
+++ b/libclc/opencl/lib/generic/subgroup/subgroup.cl
@@ -7,6 +7,36 @@
 
//===----------------------------------------------------------------------===//
 
 #include "clc/subgroup/clc_subgroup.h"
+#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
+#include "clc/workitem/clc_get_num_sub_groups.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+#include "clc/workitem/clc_get_sub_group_local_id.h"
+#include "clc/workitem/clc_get_sub_group_size.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+  return __clc_get_sub_group_size();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+  return __clc_get_max_sub_group_size();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+  return __clc_get_num_sub_groups();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return __clc_get_enqueued_num_sub_groups();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+  return __clc_get_sub_group_id();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+  return __clc_get_sub_group_local_id();
+}
 
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
   return __clc_sub_group_all(x);
diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
deleted file mode 100644
index fee3a588c2bbf..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_enqueued_num_sub_groups.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
-  return __clc_get_enqueued_num_sub_groups();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
deleted file mode 100644
index bbd19a88a0165..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_max_sub_group_size.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
-  return __clc_get_max_sub_group_size();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
deleted file mode 100644
index 77163234fe54d..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_num_sub_groups.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
-  return __clc_get_num_sub_groups();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
deleted file mode 100644
index a1ad6adb4e2cb..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
-  return __clc_get_sub_group_id();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
deleted file mode 100644
index 33164282165b3..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_local_id.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
-  return __clc_get_sub_group_local_id();
-}
diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl 
b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
deleted file mode 100644
index 62f3382b6d7df..0000000000000
--- a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clc/workitem/clc_get_sub_group_size.h"
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
-  return __clc_get_sub_group_size();
-}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to