https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/179823
>From 87db2155d053b12a12e0842aa302480167beaf71 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Wed, 4 Feb 2026 19:15:32 -0500 Subject: [PATCH 1/4] [CIR][HIP] Add Stub body emission test coverage and Fix kernelHandle storage --- clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 10 +++------- clang/test/CIR/CodeGenCUDA/kernel-call.cu | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp index ad5da0d11ff02..23e744f2cd5aa 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp @@ -121,9 +121,6 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf, if (cgm.getLangOpts().OffloadViaLLVM) cgm.errorNYI("CIRGenNVCUDARuntime: Offload via LLVM"); - if (cgm.getLangOpts().HIP) - cgm.errorNYI("CIRGenNVCUDARuntime: HIP Support"); - CIRGenBuilderTy &builder = cgm.getBuilder(); mlir::Location loc = fn.getLoc(); @@ -325,10 +322,9 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn, CIRGenBuilderTy &builder = cgm.getBuilder(); StringRef globalName = cgm.getMangledName( gd.getWithKernelReferenceKind(KernelReferenceKind::Kernel)); - const VarDecl *varDecl = llvm::dyn_cast_or_null<VarDecl>(gd.getDecl()); - cir::GlobalOp globalOp = - cgm.getOrCreateCIRGlobal(globalName, fn.getFunctionType().getReturnType(), - LangAS::Default, varDecl, NotForDefinition); + cir::GlobalOp globalOp = CIRGenModule::createGlobalOp( + cgm, fn.getLoc(), globalName, fn.getFunctionType().getReturnType(), + /*isConstant=*/true); globalOp->setAttr("alignment", builder.getI64IntegerAttr( cgm.getPointerAlign().getQuantity())); diff --git a/clang/test/CIR/CodeGenCUDA/kernel-call.cu b/clang/test/CIR/CodeGenCUDA/kernel-call.cu index 3e0a788a96d98..be22289c13f48 100644 --- a/clang/test/CIR/CodeGenCUDA/kernel-call.cu +++ b/clang/test/CIR/CodeGenCUDA/kernel-call.cu @@ -1,16 +1,19 @@ // Based on clang/test/CodeGenCUDA/kernel-call.cu. -// Tests device stub body emission for CUDA kernels. +// Tests device stub body emission for CUDA and HIP kernels. // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \ // RUN: -emit-cir %s -x cuda -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CUDA-NEW +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fhip-new-launch-api \ +// RUN: -x hip -emit-cir %s -o %t.hip.cir +// RUN: FileCheck --input-file=%t.hip.cir %s --check-prefix=HIP-NEW + #include "Inputs/cuda.h" // TODO: Test CUDA legacy (< 9.0) when legacy stub body is implemented -// TODO: Test HIP when HIP stub body support is complete // Check that the stub function is generated with the correct name // CUDA-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif @@ -47,4 +50,11 @@ // Check cudaLaunchKernel is called with all 6 arguments: // func ptr, gridDim, blockDim, args, sharedMem, stream // CUDA-NEW: cir.call @cudaLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_cudaStream>) -> !u32i +// +// HIP-NEW: cir.global constant external @_Z6kernelif = #cir.global_view<@_Z21__device_stub__kernelif> : !void +// HIP-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif +// HIP-NEW: cir.alloca !cir.ptr<!rec_hipStream>, {{.*}} ["stream"] +// HIP-NEW: cir.call @__hipPopCallConfiguration({{.*}}) : (!cir.ptr<!rec_dim3>, !cir.ptr<!rec_dim3>, !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!rec_hipStream>>) -> !s32i +// HIP-NEW: cir.get_global @_Z6kernelif : !cir.ptr<!void> +// HIP-NEW: cir.call @hipLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_hipStream>) -> !u32i __global__ void kernel(int x, float y) {} >From f0b371d0c4782618eb10f86f0ea5b214456bfffa Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 6 Feb 2026 00:38:12 -0500 Subject: [PATCH 2/4] hip global storage fix and bitcast to match hipLaunchkernel definition --- clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp index 23e744f2cd5aa..451c28c3cccc1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp @@ -210,7 +210,8 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf, cir::PointerType kernelTy = cir::PointerType::get(globalOp.getSymType()); mlir::Value kernelVal = cir::GetGlobalOp::create(builder, loc, kernelTy, globalOp.getSymName()); - return kernelVal; + mlir::Value func = builder.createBitcast(kernelVal, cgm.voidPtrTy); + return func; } if (cir::FuncOp funcOp = llvm::dyn_cast_or_null<cir::FuncOp>( kernelHandles[fn.getSymName()])) { @@ -323,7 +324,7 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn, StringRef globalName = cgm.getMangledName( gd.getWithKernelReferenceKind(KernelReferenceKind::Kernel)); cir::GlobalOp globalOp = CIRGenModule::createGlobalOp( - cgm, fn.getLoc(), globalName, fn.getFunctionType().getReturnType(), + cgm, fn.getLoc(), globalName, fn.getFunctionType(), /*isConstant=*/true); globalOp->setAttr("alignment", builder.getI64IntegerAttr( >From 4c388b79f3bf2a48b1a6cdd6232f4338ad554347 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 6 Feb 2026 01:04:43 -0500 Subject: [PATCH 3/4] lit bro --- clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 3 +++ clang/test/CIR/CodeGenCUDA/kernel-call.cu | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp index 451c28c3cccc1..3b1087c8fe745 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp @@ -204,6 +204,9 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf, // We now either pick the function or the stub global for cuda, hip // respectively. + mlir::Value* a; + + mlir::Value kernel = [&]() -> mlir::Value { if (cir::GlobalOp globalOp = llvm::dyn_cast_or_null<cir::GlobalOp>( kernelHandles[fn.getSymName()])) { diff --git a/clang/test/CIR/CodeGenCUDA/kernel-call.cu b/clang/test/CIR/CodeGenCUDA/kernel-call.cu index be22289c13f48..384e2306b5407 100644 --- a/clang/test/CIR/CodeGenCUDA/kernel-call.cu +++ b/clang/test/CIR/CodeGenCUDA/kernel-call.cu @@ -51,10 +51,10 @@ // func ptr, gridDim, blockDim, args, sharedMem, stream // CUDA-NEW: cir.call @cudaLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_cudaStream>) -> !u32i // -// HIP-NEW: cir.global constant external @_Z6kernelif = #cir.global_view<@_Z21__device_stub__kernelif> : !void +// HIP-NEW: cir.global constant external @_Z6kernelif = #cir.global_view<@_Z21__device_stub__kernelif> : !cir.func<(!s32i, !cir.float)> // HIP-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif // HIP-NEW: cir.alloca !cir.ptr<!rec_hipStream>, {{.*}} ["stream"] // HIP-NEW: cir.call @__hipPopCallConfiguration({{.*}}) : (!cir.ptr<!rec_dim3>, !cir.ptr<!rec_dim3>, !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!rec_hipStream>>) -> !s32i -// HIP-NEW: cir.get_global @_Z6kernelif : !cir.ptr<!void> +// HIP-NEW: cir.get_global @_Z6kernelif : !cir.ptr<!cir.func<(!s32i, !cir.float)>> // HIP-NEW: cir.call @hipLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_hipStream>) -> !u32i __global__ void kernel(int x, float y) {} >From 5f8b9057f2eb9d18dbe5d1724a6fcc7a01edcad6 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 6 Feb 2026 01:14:03 -0500 Subject: [PATCH 4/4] fix nit --- clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp index 3b1087c8fe745..451c28c3cccc1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp @@ -204,9 +204,6 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf, // We now either pick the function or the stub global for cuda, hip // respectively. - mlir::Value* a; - - mlir::Value kernel = [&]() -> mlir::Value { if (cir::GlobalOp globalOp = llvm::dyn_cast_or_null<cir::GlobalOp>( kernelHandles[fn.getSymName()])) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
