[clang] [AMDGPU][SPIRV] Correctly lower huge device function arguments (PR #176921)

via cfe-commits Tue, 20 Jan 2026 05:10:21 -0800

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-clang

Author: Steffen Larsen (steffenlarsen)

<details>
<summary>Changes</summary>

In the ABIInfo implementations for both the SPIRV and AMDGPU targets, the 
lowering of arguments too large to fit into registers is currently prone to 
integer overflows when determining the number of needed registers for the 
arguments. This causes arguments so large that they need more registers than an 
`unsigned` can represent to look like they fit into the available registers. To 
avoid this, the function for determining the required number of registers is 
changed to return a 64-bit unsigned integer value instead.

Note that the SPIR-V target currently trips the verifier due to a check that 
arguments passed by value don't exceed the representable size. This also 
affects other targets, such as x86 and is outside the scope of these changes.
See https://github.com/llvm/llvm-project/issues/118207.

---
Full diff: https://github.com/llvm/llvm-project/pull/176921.diff


3 Files Affected:

- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+9-9) 
- (modified) clang/lib/CodeGen/Targets/SPIR.cpp (+8-8) 
- (added) clang/test/CodeGenHIP/device-function-huge-byval-arg.hip (+17) 


``````````diff
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4bc9557b26b52..8c1e8c58e67ef 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -24,7 +24,7 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
 private:
   static const unsigned MaxNumRegsForArgsRet = 16;
 
-  unsigned numRegsForType(QualType Ty) const;
+  uint64_t numRegsForType(QualType Ty) const;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Base,
@@ -78,20 +78,20 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
 }
 
 /// Estimate number of registers the type will use when passed in registers.
-unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
-  unsigned NumRegs = 0;
+uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const {
+  uint64_t NumRegs = 0;
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     // Compute from the number of elements. The reported size is based on the
     // in-memory size, which includes the padding 4th element for 3-vectors.
     QualType EltTy = VT->getElementType();
-    unsigned EltSize = getContext().getTypeSize(EltTy);
+    uint64_t EltSize = getContext().getTypeSize(EltTy);
 
     // 16-bit element vectors should be passed as packed.
     if (EltSize == 16)
       return (VT->getNumElements() + 1) / 2;
 
-    unsigned EltNumRegs = (EltSize + 31) / 32;
+    uint64_t EltNumRegs = (EltSize + 31) / 32;
     return EltNumRegs * VT->getNumElements();
   }
 
@@ -247,7 +247,7 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, 
bool Variadic,
       return DefaultABIInfo::classifyArgumentType(Ty);
 
     // Pack aggregates <= 8 bytes into single VGPR or pair.
-    uint64_t Size = getContext().getTypeSize(Ty);
+    unsigned Size = getContext().getTypeSize(Ty);
     if (Size <= 64) {
       unsigned NumRegs = (Size + 31) / 32;
       NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
@@ -264,7 +264,7 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, 
bool Variadic,
     }
 
     if (NumRegsLeft > 0) {
-      unsigned NumRegs = numRegsForType(Ty);
+      uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {
         NumRegsLeft -= NumRegs;
         return ABIArgInfo::getDirect();
@@ -281,8 +281,8 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, 
bool Variadic,
   // Otherwise just do the default thing.
   ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
   if (!ArgInfo.isIndirect()) {
-    unsigned NumRegs = numRegsForType(Ty);
-    NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
+    uint64_t NumRegs = numRegsForType(Ty);
+    NumRegsLeft -= std::min(NumRegs, uint64_t{NumRegsLeft});
   }
 
   return ArgInfo;
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index ba90ab3e67053..61ea677292492 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -49,7 +49,7 @@ class AMDGCNSPIRVABIInfo : public SPIRVABIInfo {
   static constexpr unsigned MaxNumRegsForArgsRet = 16; // 16 32-bit registers
   mutable unsigned NumRegsLeft = 0;
 
-  unsigned numRegsForType(QualType Ty) const;
+  uint64_t numRegsForType(QualType Ty) const;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
     return true;
@@ -234,21 +234,21 @@ RValue SPIRVABIInfo::EmitVAArg(CodeGenFunction &CGF, 
Address VAListAddr,
                           /*AllowHigherAlign=*/true, Slot);
 }
 
-unsigned AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const {
+uint64_t AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const {
   // This duplicates the AMDGPUABI computation.
-  unsigned NumRegs = 0;
+  uint64_t NumRegs = 0;
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     // Compute from the number of elements. The reported size is based on the
     // in-memory size, which includes the padding 4th element for 3-vectors.
     QualType EltTy = VT->getElementType();
-    unsigned EltSize = getContext().getTypeSize(EltTy);
+    uint64_t EltSize = getContext().getTypeSize(EltTy);
 
     // 16-bit element vectors should be passed as packed.
     if (EltSize == 16)
       return (VT->getNumElements() + 1) / 2;
 
-    unsigned EltNumRegs = (EltSize + 31) / 32;
+    uint64_t EltNumRegs = (EltSize + 31) / 32;
     return EltNumRegs * VT->getNumElements();
   }
 
@@ -355,8 +355,8 @@ ABIArgInfo 
AMDGCNSPIRVABIInfo::classifyArgumentType(QualType Ty) const {
   if (!isAggregateTypeForABI(Ty)) {
     ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
     if (!ArgInfo.isIndirect()) {
-      unsigned NumRegs = numRegsForType(Ty);
-      NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
+      uint64_t NumRegs = numRegsForType(Ty);
+      NumRegsLeft -= std::min(NumRegs, uint64_t{NumRegsLeft});
     }
 
     return ArgInfo;
@@ -401,7 +401,7 @@ ABIArgInfo 
AMDGCNSPIRVABIInfo::classifyArgumentType(QualType Ty) const {
   }
 
   if (NumRegsLeft > 0) {
-    unsigned NumRegs = numRegsForType(Ty);
+    uint64_t NumRegs = numRegsForType(Ty);
     if (NumRegsLeft >= NumRegs) {
       NumRegsLeft -= NumRegs;
       return ABIArgInfo::getDirect();
diff --git a/clang/test/CodeGenHIP/device-function-huge-byval-arg.hip 
b/clang/test/CodeGenHIP/device-function-huge-byval-arg.hip
new file mode 100644
index 0000000000000..adc2a0af738a9
--- /dev/null
+++ b/clang/test/CodeGenHIP/device-function-huge-byval-arg.hip
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -disable-llvm-verifier -o - %s | FileCheck %s 
--check-prefix=CHECK-AMDGCNSPIRV
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device -o - %s | FileCheck %s --check-prefix=CHECK-AMDGCN
+
+// NOTE: The verifier is currently disabled for the spirv64 case as it 
complains
+//       about the 'byref' arguments being too large. This is currently a
+//       problem for all targets that lower large arguments to 'byref'
+//       arguments.
+
+#define __device__ __attribute__((device))
+
+typedef struct {
+  long data[6871947673600];
+} huge_struct;
+
+// CHECK-AMDGCNSPIRV: @_Z9printBits11huge_struct(ptr noundef 
byref(%struct.huge_struct)
+// CHECK-AMDGCN: @_Z9printBits11huge_struct(i16
+__device__ void printBits(huge_struct X) {}

``````````

</details>


https://github.com/llvm/llvm-project/pull/176921
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU][SPIRV] Correctly lower huge device function arguments (PR #176921)

Reply via email to