https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/184885

Assume cov5 and use new ABI.

>From 16e41f7a78172c1b628ed174aebe740190358462 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Thu, 5 Mar 2026 21:40:54 +0100
Subject: [PATCH] libclc: Reimplement amdhsa dispatch size functions

Assume cov5 and use new ABI.
---
 .../amdgcn-amdhsa/workitem/get_global_size.cl | 12 ++++++----
 .../amdgcn-amdhsa/workitem/get_local_size.cl  | 24 +++++++++++--------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl 
b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
index c10cdd2d02efb..f21a060849dbe 100644
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
+++ b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
@@ -6,11 +6,15 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <amdhsa_abi.h>
 #include <clc/opencl/opencl-base.h>
 
 _CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
-  __constant uint *ptr = (__constant uint *)__builtin_amdgcn_dispatch_ptr();
-  if (dim < 3)
-    return ptr[3 + dim];
-  return 1;
+  if (dim > 2)
+    return 1;
+  __constant amdhsa_implicit_kernarg_v5 *args =
+      (__constant amdhsa_implicit_kernarg_v5 *)
+          __builtin_amdgcn_implicitarg_ptr();
+  return args->block_count[dim] * (uint)args->group_size[dim] +
+         (uint)args->remainder[dim];
 }
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl 
b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
index a95c58ca18534..ed1e17776361e 100644
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
+++ b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
@@ -6,17 +6,21 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <amdhsa_abi.h>
 #include <clc/opencl/opencl-base.h>
 
 _CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
-  __constant uint *ptr = (__constant uint *)__builtin_amdgcn_dispatch_ptr();
-  switch (dim) {
-  case 0:
-    return ptr[1] & 0xffffu;
-  case 1:
-    return ptr[1] >> 16;
-  case 2:
-    return ptr[2] & 0xffffu;
-  }
-  return 1;
+  if (dim > 2)
+    return 1;
+
+  __constant amdhsa_implicit_kernarg_v5 *args =
+      (__constant amdhsa_implicit_kernarg_v5 *)
+          __builtin_amdgcn_implicitarg_ptr();
+
+  uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(),
+                       __builtin_amdgcn_workgroup_id_y(),
+                       __builtin_amdgcn_workgroup_id_z()};
+
+  return group_ids[dim] < args->block_count[dim] ? 
(size_t)args->group_size[dim]
+                                                 : 
(size_t)args->remainder[dim];
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to