[clang] [AMDGPU] Pack uniform float structs into vector types (PR #195417)

via cfe-commits Wed, 06 May 2026 11:00:35 -0700

https://github.com/addmisol updated 
https://github.com/llvm/llvm-project/pull/195417


>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 1/7] Create amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 
clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 
%x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 
%x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <[email protected]>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 2/7] Delete
 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 -------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 
clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b..0000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float 
%x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
-    int i;
-    float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
-
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
-
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, 
i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 
%x.coerce1)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 
%x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
-}

>From a0240d7c483356ac87cae6718f0467e9c6781081 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sat, 2 May 2026 11:16:00 +0530
Subject: [PATCH 3/7] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 98 ++++++++++++++++++++++++----
 1 file changed, 85 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index a4b6a5fa5d35b..6fb50b47e532a 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -8,7 +8,6 @@
 
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
-#include "clang/AST/DeclCXX.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/AMDGPUAddrSpace.h"
 
@@ -78,6 +77,67 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
   return Members * NumRegs <= MaxNumRegsForArgsRet;
 }
 
+/// Check if struct contains only identical float types that can be packed
+/// into a vector (e.g., {half, half} -> <2 x half>, {float, float} -> <2 x 
float>).
+/// Returns the vector type if packable, nullptr otherwise.
+static llvm::Type *getPackableHomogeneousFloatVectorType(const RecordDecl *RD,
+                                                          const ASTContext 
&Context,
+                                                          llvm::LLVMContext 
&VMContext) {
+  QualType FirstFloatTy;
+  unsigned Count = 0;
+
+  for (const FieldDecl *Field : RD->fields()) {
+    // No bitfields in float vector packing
+    if (Field->isBitField())
+      return nullptr;
+
+    QualType FieldTy = Field->getType();
+
+    // Must be a floating-point type
+    if (!FieldTy->isFloatingType())
+      return nullptr;
+
+    // All fields must be the same type
+    if (FirstFloatTy.isNull()) {
+      FirstFloatTy = FieldTy;
+    } else if (!Context.hasSameType(FirstFloatTy, FieldTy)) {
+      return nullptr; // Mixed float types like {half, float}
+    }
+
+    Count++;
+  }
+
+  // Only pack 2 or 4 elements (common vector sizes)
+  if (Count != 2 && Count != 4)
+    return nullptr;
+
+  // Convert QualType to LLVM Type
+  llvm::Type *EltTy = nullptr;
+  const BuiltinType *BT = FirstFloatTy->getAs<BuiltinType>();
+  if (!BT)
+    return nullptr;
+
+  switch (BT->getKind()) {
+  case BuiltinType::Half:
+  case BuiltinType::Float16:
+    EltTy = llvm::Type::getHalfTy(VMContext);
+    break;
+  case BuiltinType::BFloat16:
+    EltTy = llvm::Type::getBFloatTy(VMContext);
+    break;
+  case BuiltinType::Float:
+    EltTy = llvm::Type::getFloatTy(VMContext);
+    break;
+  case BuiltinType::Double:
+    EltTy = llvm::Type::getDoubleTy(VMContext);
+    break;
+  default:
+    return nullptr;
+  }
+
+  return llvm::FixedVectorType::get(EltTy, Count);
+}
+
 /// Check if all fields in an aggregate type contain only sub-32-bit integer
 /// types. Such aggregates should be packed into i32 registers rather than
 /// passed as individual elements. Aggregates containing floats or full-sized
@@ -218,6 +278,17 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType 
RetTy) const {
       uint64_t Size = getContext().getTypeSize(RetTy);
       if (Size <= 64) {
         const RecordDecl *RD = RetTy->getAsRecordDecl();
+
+        // First, try to pack uniform float structs into vectors
+        // e.g., {half, half} -> <2 x half>, {float, float} -> <2 x float>
+        if (RD) {
+          if (llvm::Type *VecTy = getPackableHomogeneousFloatVectorType(
+                  RD, getContext(), getVMContext())) {
+            return ABIArgInfo::getDirect(VecTy);
+          }
+        }
+
+        // Then, check for packable integer types
         bool ShouldPackToInt =
             RD && containsOnlyPackableIntegerTypes(RD, getContext());
 
@@ -319,6 +390,19 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType 
Ty, bool Variadic,
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size <= 64) {
       const RecordDecl *RD = Ty->getAsRecordDecl();
+
+      // First, try to pack uniform float structs into vectors
+      // e.g., {half, half} -> <2 x half>, {float, float} -> <2 x float>
+      if (RD) {
+        if (llvm::Type *VecTy = getPackableHomogeneousFloatVectorType(
+                RD, getContext(), getVMContext())) {
+          unsigned NumRegs = (Size + 31) / 32;
+          NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+          return ABIArgInfo::getDirect(VecTy);
+        }
+      }
+
+      // Then, check for packable integer types
       bool ShouldPackToInt =
           RD && containsOnlyPackableIntegerTypes(RD, getContext());
 
@@ -382,9 +466,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
     return getLangASFromTargetAS(
         getABIInfo().getDataLayout().getAllocaAddrSpace());
   }
-
-  LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override;
-
   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
                                   const VarDecl *D) const override;
   StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
@@ -544,15 +625,6 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
       llvm::ConstantPointerNull::get(NPT), PT);
 }
 
-LangAS
-AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const {
-  // Types with no viable copy/move must be constructed in-place , use the
-  // default AS so the sret pointer matches the "this" convention.
-  if (RD && !RD->canPassInRegisters())
-    return LangAS::Default;
-  return getASTAllocaAddressSpace();
-}
-
 LangAS
 AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
                                                   const VarDecl *D) const {

>From 7a525322360080a1c61ebd5f166f21ecc1e3f32d Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sat, 2 May 2026 11:23:09 +0530
Subject: [PATCH 4/7] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 6fb50b47e532a..f0233505c41b4 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -8,6 +8,7 @@
 
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
+#include "clang/AST/DeclCXX.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/AMDGPUAddrSpace.h"
 
@@ -466,6 +467,9 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
     return getLangASFromTargetAS(
         getABIInfo().getDataLayout().getAllocaAddrSpace());
   }
+
+  LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override;
+
   LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
                                   const VarDecl *D) const override;
   StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
@@ -625,6 +629,15 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
       llvm::ConstantPointerNull::get(NPT), PT);
 }
 
+LangAS
+AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const {
+  // Types with no viable copy/move must be constructed in-place , use the
+  // default AS so the sret pointer matches the "this" convention.
+  if (RD && !RD->canPassInRegisters())
+    return LangAS::Default;
+  return getASTAllocaAddressSpace();
+}
+
 LangAS
 AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
                                                   const VarDecl *D) const {

>From 628a45dfb6eea77c3232628a86f026b3b6f46ed1 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sat, 2 May 2026 11:24:29 +0530
Subject: [PATCH 5/7] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c 
b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 2a1ebf0437f61..1092e06f97c1e 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -38,8 +38,8 @@ typedef struct two_floats {
     float b;
 } two_floats;
 
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float 
%x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
+// Two floats can be packed into <2 x float> vector
+// CHECK-LABEL: define{{.*}} <2 x float> @return_two_floats(<2 x float> 
%x.coerce)
 two_floats return_two_floats(two_floats x) {
     return x;
 }
@@ -517,8 +517,8 @@ typedef struct half_struct {
 } half_struct;
 
 // Two halfs = 32 bits, but floats - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.half_struct @return_half_struct(half 
%x.coerce0, half %x.coerce1)
-// CHECK: ret %struct.half_struct
+// Two halfs = 32 bits - can be packed into <2 x half> vector
+// CHECK-LABEL: define{{.*}} <2 x half> @return_half_struct(<2 x half> 
%x.coerce)
 half_struct return_half_struct(half_struct x) {
     return x;
 }
@@ -540,8 +540,8 @@ typedef struct four_halfs {
 } four_halfs;
 
 // Four halfs = 64 bits - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.four_halfs @return_four_halfs(half 
%x.coerce0, half %x.coerce1, half %x.coerce2, half %x.coerce3)
-// CHECK: ret %struct.four_halfs
+// Four halfs = 64 bits - can be packed into <4 x half> vector
+// CHECK-LABEL: define{{.*}} <4 x half> @return_four_halfs(<4 x half> 
%x.coerce)
 four_halfs return_four_halfs(four_halfs x) {
     return x;
 }
@@ -554,8 +554,8 @@ typedef struct bfloat_struct {
 } bfloat_struct;
 
 // Two bfloats = 32 bits, but floats - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.bfloat_struct 
@return_bfloat_struct(bfloat %x.coerce0, bfloat %x.coerce1)
-// CHECK: ret %struct.bfloat_struct
+// Two bfloats = 32 bits - can be packed into <2 x bfloat> vector
+// CHECK-LABEL: define{{.*}} <2 x bfloat> @return_bfloat_struct(<2 x bfloat> 
%x.coerce)
 bfloat_struct return_bfloat_struct(bfloat_struct x) {
     return x;
 }
@@ -577,8 +577,8 @@ typedef struct four_bfloats {
 } four_bfloats;
 
 // Four bfloats = 64 bits - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.four_bfloats @return_four_bfloats(bfloat 
%x.coerce0, bfloat %x.coerce1, bfloat %x.coerce2, bfloat %x.coerce3)
-// CHECK: ret %struct.four_bfloats
+// Four bfloats = 64 bits - can be packed into <4 x bfloat> vector
+// CHECK-LABEL: define{{.*}} <4 x bfloat> @return_four_bfloats(<4 x bfloat> 
%x.coerce)
 four_bfloats return_four_bfloats(four_bfloats x) {
     return x;
 }

>From 87454a760197d9e6e4bc2fb0d2698203476444e3 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Sat, 2 May 2026 13:17:39 +0530
Subject: [PATCH 6/7] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f0233505c41b4..9ccd1d452ca72 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -79,11 +79,12 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
 }
 
 /// Check if struct contains only identical float types that can be packed
-/// into a vector (e.g., {half, half} -> <2 x half>, {float, float} -> <2 x 
float>).
-/// Returns the vector type if packable, nullptr otherwise.
-static llvm::Type *getPackableHomogeneousFloatVectorType(const RecordDecl *RD,
-                                                          const ASTContext 
&Context,
-                                                          llvm::LLVMContext 
&VMContext) {
+/// into a vector (e.g., {half, half} -> <2 x half>, {float, float} -> <2 x
+/// float>). Returns the vector type if packable, nullptr otherwise.
+static llvm::Type *
+getPackableHomogeneousFloatVectorType(const RecordDecl *RD,
+                                      const ASTContext &Context,
+                                      llvm::LLVMContext &VMContext) {
   QualType FirstFloatTy;
   unsigned Count = 0;
 

>From 2804437ed5a1a07648d9c577d14471b682ee0866 Mon Sep 17 00:00:00 2001
From: Addmisol <[email protected]>
Date: Wed, 6 May 2026 23:30:17 +0530
Subject: [PATCH 7/7] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 54 ++++++++--------------------
 1 file changed, 15 insertions(+), 39 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 9ccd1d452ca72..dc8835e9fe03f 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -42,8 +42,7 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
   }
 
 public:
-  explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
-    DefaultABIInfo(CGT) {}
+  explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
@@ -70,8 +69,8 @@ bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType 
Ty) const {
   return true;
 }
 
-bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
-  const Type *Base, uint64_t Members) const {
+bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                      uint64_t Members) const {
   uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
 
   // Homogeneous Aggregates may occupy at most 16 registers.
@@ -81,10 +80,8 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
 /// Check if struct contains only identical float types that can be packed
 /// into a vector (e.g., {half, half} -> <2 x half>, {float, float} -> <2 x
 /// float>). Returns the vector type if packable, nullptr otherwise.
-static llvm::Type *
-getPackableHomogeneousFloatVectorType(const RecordDecl *RD,
-                                      const ASTContext &Context,
-                                      llvm::LLVMContext &VMContext) {
+static llvm::Type *getPackableHomogeneousFloatVectorType(
+    const RecordDecl *RD, const ASTContext &Context, CodeGenTypes &CGT) {
   QualType FirstFloatTy;
   unsigned Count = 0;
 
@@ -113,30 +110,7 @@ getPackableHomogeneousFloatVectorType(const RecordDecl *RD,
   if (Count != 2 && Count != 4)
     return nullptr;
 
-  // Convert QualType to LLVM Type
-  llvm::Type *EltTy = nullptr;
-  const BuiltinType *BT = FirstFloatTy->getAs<BuiltinType>();
-  if (!BT)
-    return nullptr;
-
-  switch (BT->getKind()) {
-  case BuiltinType::Half:
-  case BuiltinType::Float16:
-    EltTy = llvm::Type::getHalfTy(VMContext);
-    break;
-  case BuiltinType::BFloat16:
-    EltTy = llvm::Type::getBFloatTy(VMContext);
-    break;
-  case BuiltinType::Float:
-    EltTy = llvm::Type::getFloatTy(VMContext);
-    break;
-  case BuiltinType::Double:
-    EltTy = llvm::Type::getDoubleTy(VMContext);
-    break;
-  default:
-    return nullptr;
-  }
-
+  llvm::Type *EltTy = CGT.ConvertType(FirstFloatTy);
   return llvm::FixedVectorType::get(EltTy, Count);
 }
 
@@ -285,7 +259,7 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType 
RetTy) const {
         // e.g., {half, half} -> <2 x half>, {float, float} -> <2 x float>
         if (RD) {
           if (llvm::Type *VecTy = getPackableHomogeneousFloatVectorType(
-                  RD, getContext(), getVMContext())) {
+                  RD, getContext(), CGT)) {
             return ABIArgInfo::getDirect(VecTy);
           }
         }
@@ -396,8 +370,8 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, 
bool Variadic,
       // First, try to pack uniform float structs into vectors
       // e.g., {half, half} -> <2 x half>, {float, float} -> <2 x float>
       if (RD) {
-        if (llvm::Type *VecTy = getPackableHomogeneousFloatVectorType(
-                RD, getContext(), getVMContext())) {
+        if (llvm::Type *VecTy =
+                getPackableHomogeneousFloatVectorType(RD, getContext(), CGT)) {
           unsigned NumRegs = (Size + 31) / 32;
           NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
           return ABIArgInfo::getDirect(VecTy);
@@ -462,7 +436,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
   unsigned getDeviceKernelCallingConv() const override;
 
   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
-      llvm::PointerType *T, QualType QT) const override;
+                                 llvm::PointerType *T,
+                                 QualType QT) const override;
 
   LangAS getASTAllocaAddressSpace() const override {
     return getLangASFromTargetAS(
@@ -617,9 +592,10 @@ unsigned 
AMDGPUTargetCodeGenInfo::getDeviceKernelCallingConv() const {
 // emitting null pointers in private and local address spaces, a null
 // pointer in generic address space is emitted which is casted to a
 // pointer in local or private address space.
-llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
-    const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
-    QualType QT) const {
+llvm::Constant *
+AMDGPUTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
+                                        llvm::PointerType *PT,
+                                        QualType QT) const {
   if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
     return llvm::ConstantPointerNull::get(PT);
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU] Pack uniform float structs into vector types (PR #195417)

Reply via email to