llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Erich Keane (erichkeane) <details> <summary>Changes</summary> Our union layout calculations were quite a bit wrong, `getLargestMember` actually got the largest aligned type, which is not the same thing. This showed up in quite a few places. #<!-- -->198361 was JUST submitted, and seems to have fixed the SYMPTOM, but not the actual size calculation in a way that is robust in the future. This patch removes `getLargestMember` and renames it to do what it actually does: gets the storage type of the union. It also corrects the alignment and size calculations (and as a drive-by, fixes the calculation for the size of an empty union). This patch keeps the test changes from the previous fix, to make sure we get that correct. --- Full diff: https://github.com/llvm/llvm-project/pull/199292.diff 5 Files Affected: - (modified) clang/include/clang/CIR/Dialect/IR/CIRTypes.td (+1-6) - (modified) clang/lib/CIR/Dialect/IR/CIRTypes.cpp (+40-48) - (modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+5-3) - (added) clang/test/CIR/CodeGen/union-layout.cpp (+50) - (modified) clang/unittests/CIR/UnionTypeSizeTest.cpp (+1-1) ``````````diff diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index 9e639df13de70..64a1a542ec740 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -737,12 +737,7 @@ def CIR_RecordType : CIR_Type<"Record", "record", [ bool isComplete() const { return !isIncomplete(); }; bool isIncomplete() const; - mlir::Type getLargestMember(const mlir::DataLayout &dataLayout) const; - - /// Tail-padding member for a padded union (last member appended by - /// lowerUnion). Empty type when the record is not padded. - mlir::Type getPadding() const; - + mlir::Type getUnionStorageType(const mlir::DataLayout &dataLayout) const; size_t getNumElements() const { return getMembers().size(); }; std::string getKindAsStr() { switch (getKind()) { diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 23c327e81831b..2f0a11028ea21 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -322,36 +322,26 @@ void RecordType::complete(ArrayRef<Type> members, bool packed, bool padded) { llvm_unreachable("failed to complete record"); } -/// Return the largest member of in the type. -/// -/// Recurses into union members never returning a union as the largest member. -Type RecordType::getLargestMember(const ::mlir::DataLayout &dataLayout) const { - assert(isUnion() && "Only call getLargestMember on unions"); +/// Return the 'storage' type of the union, that is, without padding, +/// the type that is used to convert to the 'storage' for LLVM-IR. +Type RecordType::getUnionStorageType( + const ::mlir::DataLayout &dataLayout) const { + assert(isUnion() && "Only call getUnionStorageType on unions"); llvm::ArrayRef<Type> members = getMembers(); - if (members.empty()) + if (members.empty() || (getPadded() && members.size() == 1)) return {}; // If the union is padded, we need to ignore the last member, // which is the padding. - auto endIt = getPadded() ? std::prev(members.end()) : members.end(); - if (endIt == members.begin()) - return {}; - return *std::max_element(members.begin(), endIt, [&](Type lhs, Type rhs) { - return dataLayout.getTypeABIAlignment(lhs) < - dataLayout.getTypeABIAlignment(rhs) || - (dataLayout.getTypeABIAlignment(lhs) == - dataLayout.getTypeABIAlignment(rhs) && - dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs)); - }); -} - -mlir::Type RecordType::getPadding() const { - if (!getPadded()) - return {}; - llvm::ArrayRef<mlir::Type> members = getMembers(); - if (members.empty()) - return {}; - return members.back(); + return *std::max_element( + members.begin(), std::prev(members.end(), getPadded()), + [&](Type lhs, Type rhs) { + return dataLayout.getTypeABIAlignment(lhs) < + dataLayout.getTypeABIAlignment(rhs) || + (dataLayout.getTypeABIAlignment(lhs) == + dataLayout.getTypeABIAlignment(rhs) && + dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs)); + }); } bool RecordType::isLayoutIdentical(const RecordType &other) { @@ -388,26 +378,18 @@ llvm::TypeSize RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { if (isUnion()) { - mlir::Type largest = getLargestMember(dataLayout); - if (!largest) - return llvm::TypeSize::getFixed(0); - // `getLargestMember` returns the highest-aligned variant (which dictates - // the union's alignment), not necessarily the largest by size. When the - // union is `padded` -- i.e., its highest-aligned variant is strictly - // smaller than its layout size, as happens for any union containing both - // a small high-alignment scalar and a larger low-alignment array (e.g., - // `union { char[16]; size_t; }`) -- `lowerUnion` appended a trailing - // byte-array member to extend the highest-aligned variant up to the - // layout size, and `LowerToLLVM` mirrors this by emitting the union as - // `{largest, padding}`. Include that padding here so `getTypeSize` - // reports the same size `LowerToLLVM` produces; otherwise a parent - // record containing the union gets a spurious tail-padding member added - // by `insertPadding`, making `sizeof(parent)` and array GEPs off by the - // missing bytes. - llvm::TypeSize size = dataLayout.getTypeSizeInBits(largest); - if (mlir::Type tailPad = getPadding()) - size += dataLayout.getTypeSizeInBits(tailPad); - return size; + // The size of a union is the size of the largest member. + llvm::ArrayRef<Type> members = getMembers(); + if (members.empty() || (getPadded() && members.size() == 1)) + return llvm::TypeSize::getFixed(8); + + mlir::Type largestType = *std::max_element( + members.begin(), std::prev(members.end(), getPadded()), + [&](Type lhs, Type rhs) { + return dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs); + }); + + return dataLayout.getTypeSizeInBits(largestType); } auto recordSize = static_cast<uint64_t>(computeStructSize(dataLayout)); @@ -418,10 +400,20 @@ uint64_t RecordType::getABIAlignment(const ::mlir::DataLayout &dataLayout, ::mlir::DataLayoutEntryListRef params) const { if (isUnion()) { - mlir::Type largest = getLargestMember(dataLayout); - if (!largest) + // The alignment of a union is the alignment of the member with the largest + // alignment. + llvm::ArrayRef<Type> members = getMembers(); + if (members.empty() || (getPadded() && members.size() == 1)) return 1; - return dataLayout.getTypeABIAlignment(largest); + + mlir::Type largestAlignType = *std::max_element( + members.begin(), std::prev(members.end(), getPadded()), + [&](Type lhs, Type rhs) { + return dataLayout.getTypeABIAlignment(lhs) < + dataLayout.getTypeABIAlignment(rhs); + }); + + return dataLayout.getTypeABIAlignment(largestAlignType); } // Packed structures always have an ABI alignment of 1. diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index c4e98e299dfc1..c7fac702af219 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3297,13 +3297,15 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, for (mlir::Type ty : type.getMembers()) llvmMembers.push_back(convertTypeForMemory(converter, dataLayout, ty)); break; - // Unions are lowered as only the largest member. + // Unions are lowered as the 'storage' type. Which is the type with the + // largest alignment (or, if there are multiples, the largest type with the + // largest alignment). case cir::RecordType::Union: if (type.getMembers().empty()) break; - if (auto largestMember = type.getLargestMember(dataLayout)) + if (auto storageType = type.getUnionStorageType(dataLayout)) llvmMembers.push_back( - convertTypeForMemory(converter, dataLayout, largestMember)); + convertTypeForMemory(converter, dataLayout, storageType)); if (type.getPadded()) { auto last = *type.getMembers().rbegin(); llvmMembers.push_back( diff --git a/clang/test/CIR/CodeGen/union-layout.cpp b/clang/test/CIR/CodeGen/union-layout.cpp new file mode 100644 index 0000000000000..85cd8ee2803ce --- /dev/null +++ b/clang/test/CIR/CodeGen/union-layout.cpp @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +struct HasAnonUnion { + union { + int buf[4]; + long long cap; + }; +}; +// CIR-DAG: !rec_HasAnonUnion = !cir.record<struct "HasAnonUnion" {![[ANON_UNION:.*]]}> +// LLVM-DAG: %struct.HasAnonUnion = type { %[[ANON_UNION:.*]] } + +// CIR-DAG: ![[ANON_UNION]] = !cir.record<union {{.*}} padded {!cir.array<!s32i x 4>, !s64i, !cir.array<!u8i x 8>}> +// LLVM-DAG: %[[ANON_UNION]] = type { i64, [8 x i8] } + +struct ContainsHasAnonUnion { + HasAnonUnion hau; + int thing; +}; +// CIR-DAG: !rec_ContainsHasAnonUnion = !cir.record<struct "ContainsHasAnonUnion" {!rec_HasAnonUnion, !s32i}> +// LLVM-DAG: %struct.ContainsHasAnonUnion = type { %struct.HasAnonUnion, i32 } + +struct HasEmptyUnion { + union { + } u; +}; + +struct ContainsHasEmptyUnion { + HasEmptyUnion heu; + int thing; +}; +// CIR-DAG: !rec_ContainsHasEmptyUnion = !cir.record<struct "ContainsHasEmptyUnion" padded {!cir.array<!u8i x 4>, !s32i}> +// LLVM-DAG: %struct.ContainsHasEmptyUnion = type { [4 x i8], i32 } + + +void use() { + ContainsHasAnonUnion u; + ContainsHasEmptyUnion u2; +} +// CIR: cir.func {{.*}}@_Z3usev() +// CIR: cir.alloca !rec_ContainsHasAnonUnion, !cir.ptr<!rec_ContainsHasAnonUnion>, ["u"] {alignment = 8 : i64} +// CIR: cir.alloca !rec_ContainsHasEmptyUnion, !cir.ptr<!rec_ContainsHasEmptyUnion>, ["u2"] {alignment = 4 : i64} + +// LLVM-LABEL: define {{.*}}@_Z3usev() +// LLVM: alloca %struct.ContainsHasAnonUnion, {{.*}}align 8 +// LLVM: alloca %struct.ContainsHasEmptyUnion, {{.*}}align 4 diff --git a/clang/unittests/CIR/UnionTypeSizeTest.cpp b/clang/unittests/CIR/UnionTypeSizeTest.cpp index a5b3f42cc0e99..b4fb3144797c2 100644 --- a/clang/unittests/CIR/UnionTypeSizeTest.cpp +++ b/clang/unittests/CIR/UnionTypeSizeTest.cpp @@ -73,7 +73,7 @@ TEST_F(UnionTypeSizeTest, EmptyUnion) { mlir::DataLayout dl(module); llvm::TypeSize size = dl.getTypeSizeInBits(ty); - EXPECT_EQ(size.getFixedValue(), 0u); + EXPECT_EQ(size.getFixedValue(), 8u); module->erase(); } `````````` </details> https://github.com/llvm/llvm-project/pull/199292 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
