https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/185006
Assume v5 ABI, and move handling into clc. >From 95c0ce82ce5d1d3accdefb0a32deebe9594af468 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Fri, 6 Mar 2026 14:32:47 +0100 Subject: [PATCH] libclc: Reimplement amdhsa get_num_groups Assume v5 ABI, and move handling into clc. --- libclc/clc/lib/amdgcn/SOURCES | 1 + .../amdgcn/workitem/clc_get_num_groups.cl} | 19 ++++++++-------- libclc/opencl/lib/amdgcn-amdhsa/SOURCES | 1 - libclc/opencl/lib/amdgcn/SOURCES | 1 - .../lib/amdgcn/workitem/get_num_groups.cl | 22 ------------------- libclc/opencl/lib/generic/SOURCES | 1 + .../workitem/get_num_groups.cl | 0 libclc/opencl/lib/ptx-nvidiacl/SOURCES | 1 - 8 files changed, 12 insertions(+), 34 deletions(-) rename libclc/{opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl => clc/lib/amdgcn/workitem/clc_get_num_groups.cl} (50%) delete mode 100644 libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl rename libclc/opencl/lib/{ptx-nvidiacl => generic}/workitem/get_num_groups.cl (100%) diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index 959e4fb48e97a..28ce5f1943825 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -10,4 +10,5 @@ workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl workitem/clc_get_max_sub_group_size.cl +workitem/clc_get_num_groups.cl workitem/clc_get_work_dim.cl diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_num_groups.cl similarity index 50% rename from libclc/opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl rename to libclc/clc/lib/amdgcn/workitem/clc_get_num_groups.cl index 0d03689feb9ba..39912655c3c4c 100644 --- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_num_groups.cl +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_num_groups.cl @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// -#include <clc/opencl/opencl-base.h> +#include "clc/workitem/clc_get_num_groups.h" +#include <amdhsa_abi.h> -_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) { - size_t global_size = get_global_size(dim); - size_t local_size = get_local_size(dim); - size_t num_groups = global_size / local_size; - if (global_size % local_size != 0) { - num_groups++; - } - return num_groups; +_CLC_OVERLOAD _CLC_DEF size_t __clc_get_num_groups(uint dim) { + if (dim > 2) + return 1; + + __constant amdhsa_implicit_kernarg_v5 *args = + (__constant amdhsa_implicit_kernarg_v5 *) + __builtin_amdgcn_implicitarg_ptr(); + return args->block_count[dim] + (args->remainder[dim] > 0); } diff --git a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES b/libclc/opencl/lib/amdgcn-amdhsa/SOURCES index 8224b7721b2ca..ee3a48ce2c474 100644 --- a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/opencl/lib/amdgcn-amdhsa/SOURCES @@ -1,3 +1,2 @@ workitem/get_global_size.cl workitem/get_local_size.cl -workitem/get_num_groups.cl diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES index 9b3ddf192e3dc..ac72d8a00c9d0 100644 --- a/libclc/opencl/lib/amdgcn/SOURCES +++ b/libclc/opencl/lib/amdgcn/SOURCES @@ -6,5 +6,4 @@ workitem/get_group_id.cl workitem/get_global_size.cl workitem/get_local_id.cl workitem/get_local_size.cl -workitem/get_num_groups.cl workitem/get_work_dim.cl diff --git a/libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl b/libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl deleted file mode 100644 index 9e8dddb859064..0000000000000 --- a/libclc/opencl/lib/amdgcn/workitem/get_num_groups.cl +++ /dev/null @@ -1,22 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/opencl/opencl-base.h> - -_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) { - switch (dim) { - case 0: - return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); - case 1: - return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y(); - case 2: - return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z(); - default: - return 1; - } -} diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES index 312657f3bf106..dd51d5c084d51 100644 --- a/libclc/opencl/lib/generic/SOURCES +++ b/libclc/opencl/lib/generic/SOURCES @@ -205,3 +205,4 @@ synchronization/work_group_barrier.cl workitem/get_enqueued_local_size.cl workitem/get_global_id.cl workitem/get_global_size.cl +workitem/get_num_groups.cl diff --git a/libclc/opencl/lib/ptx-nvidiacl/workitem/get_num_groups.cl b/libclc/opencl/lib/generic/workitem/get_num_groups.cl similarity index 100% rename from libclc/opencl/lib/ptx-nvidiacl/workitem/get_num_groups.cl rename to libclc/opencl/lib/generic/workitem/get_num_groups.cl diff --git a/libclc/opencl/lib/ptx-nvidiacl/SOURCES b/libclc/opencl/lib/ptx-nvidiacl/SOURCES index eb64360fece7a..b8e8f64b5802a 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/SOURCES +++ b/libclc/opencl/lib/ptx-nvidiacl/SOURCES @@ -5,7 +5,6 @@ workitem/get_local_id.cl workitem/get_local_linear_id.cl workitem/get_local_size.cl workitem/get_max_sub_group_size.cl -workitem/get_num_groups.cl workitem/get_num_sub_groups.cl workitem/get_sub_group_id.cl workitem/get_sub_group_local_id.cl _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
