https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/185006

Assume v5 ABI, and move handling into clc.

>From 95c0ce82ce5d1d3accdefb0a32deebe9594af468 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Fri, 6 Mar 2026 14:32:47 +0100
Subject: [PATCH] libclc: Reimplement amdhsa get_num_groups

Assume v5 ABI, and move handling into clc.
---
 libclc/clc/lib/amdgcn/SOURCES                 |  1 +
 .../amdgcn/workitem/clc_get_num_groups.cl}    | 19 ++++++++--------
 libclc/opencl/lib/amdgcn-amdhsa/SOURCES       |  1 -
 libclc/opencl/lib/amdgcn/SOURCES              |  1 -
 .../lib/amdgcn/workitem/get_num_groups.cl     | 22 -------------------
 libclc/opencl/lib/generic/SOURCES             |  1 +
 .../workitem/get_num_groups.cl                |  0
 libclc/opencl/lib/ptx-nvidiacl/SOURCES        |  1 -
 8 files changed, 12 insertions(+), 34 deletions(-)
 rename libclc/{opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl => 
clc/lib/amdgcn/workitem/clc_get_num_groups.cl} (50%)
 delete mode 100644 libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl
 rename libclc/opencl/lib/{ptx-nvidiacl => generic}/workitem/get_num_groups.cl 
(100%)

diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index 959e4fb48e97a..28ce5f1943825 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -10,4 +10,5 @@ workitem/clc_get_group_id.cl
 workitem/clc_get_local_id.cl
 workitem/clc_get_local_size.cl
 workitem/clc_get_max_sub_group_size.cl
+workitem/clc_get_num_groups.cl
 workitem/clc_get_work_dim.cl
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl 
b/libclc/clc/lib/amdgcn/workitem/clc_get_num_groups.cl
similarity index 50%
rename from libclc/opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl
rename to libclc/clc/lib/amdgcn/workitem/clc_get_num_groups.cl
index 0d03689feb9ba..39912655c3c4c 100644
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_num_groups.cl
@@ -6,14 +6,15 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/opencl/opencl-base.h>
+#include "clc/workitem/clc_get_num_groups.h"
+#include <amdhsa_abi.h>
 
-_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
-  size_t global_size = get_global_size(dim);
-  size_t local_size = get_local_size(dim);
-  size_t num_groups = global_size / local_size;
-  if (global_size % local_size != 0) {
-    num_groups++;
-  }
-  return num_groups;
+_CLC_OVERLOAD _CLC_DEF size_t __clc_get_num_groups(uint dim) {
+  if (dim > 2)
+    return 1;
+
+  __constant amdhsa_implicit_kernarg_v5 *args =
+      (__constant amdhsa_implicit_kernarg_v5 *)
+          __builtin_amdgcn_implicitarg_ptr();
+  return args->block_count[dim] + (args->remainder[dim] > 0);
 }
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES 
b/libclc/opencl/lib/amdgcn-amdhsa/SOURCES
index 8224b7721b2ca..ee3a48ce2c474 100644
--- a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES
+++ b/libclc/opencl/lib/amdgcn-amdhsa/SOURCES
@@ -1,3 +1,2 @@
 workitem/get_global_size.cl
 workitem/get_local_size.cl
-workitem/get_num_groups.cl
diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 9b3ddf192e3dc..ac72d8a00c9d0 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -6,5 +6,4 @@ workitem/get_group_id.cl
 workitem/get_global_size.cl
 workitem/get_local_id.cl
 workitem/get_local_size.cl
-workitem/get_num_groups.cl
 workitem/get_work_dim.cl
diff --git a/libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl 
b/libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl
deleted file mode 100644
index 9e8dddb859064..0000000000000
--- a/libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl
+++ /dev/null
@@ -1,22 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
-  switch (dim) {
-  case 0:
-    return __builtin_amdgcn_grid_size_x() / 
__builtin_amdgcn_workgroup_size_x();
-  case 1:
-    return __builtin_amdgcn_grid_size_y() / 
__builtin_amdgcn_workgroup_size_y();
-  case 2:
-    return __builtin_amdgcn_grid_size_z() / 
__builtin_amdgcn_workgroup_size_z();
-  default:
-    return 1;
-  }
-}
diff --git a/libclc/opencl/lib/generic/SOURCES 
b/libclc/opencl/lib/generic/SOURCES
index 312657f3bf106..dd51d5c084d51 100644
--- a/libclc/opencl/lib/generic/SOURCES
+++ b/libclc/opencl/lib/generic/SOURCES
@@ -205,3 +205,4 @@ synchronization/work_group_barrier.cl
 workitem/get_enqueued_local_size.cl
 workitem/get_global_id.cl
 workitem/get_global_size.cl
+workitem/get_num_groups.cl
diff --git a/libclc/opencl/lib/ptx-nvidiacl/workitem/get_num_groups.cl 
b/libclc/opencl/lib/generic/workitem/get_num_groups.cl
similarity index 100%
rename from libclc/opencl/lib/ptx-nvidiacl/workitem/get_num_groups.cl
rename to libclc/opencl/lib/generic/workitem/get_num_groups.cl
diff --git a/libclc/opencl/lib/ptx-nvidiacl/SOURCES 
b/libclc/opencl/lib/ptx-nvidiacl/SOURCES
index eb64360fece7a..b8e8f64b5802a 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/opencl/lib/ptx-nvidiacl/SOURCES
@@ -5,7 +5,6 @@ workitem/get_local_id.cl
 workitem/get_local_linear_id.cl
 workitem/get_local_size.cl
 workitem/get_max_sub_group_size.cl
-workitem/get_num_groups.cl
 workitem/get_num_sub_groups.cl
 workitem/get_sub_group_id.cl
 workitem/get_sub_group_local_id.cl

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to