https://github.com/Andres-Salamanca updated https://github.com/llvm/llvm-project/pull/142041
>From d6e0ab92adad4110280ac86a2e5f33f5a690b65f Mon Sep 17 00:00:00 2001 From: Andres Salamanca <andrealebarbari...@gmail.com> Date: Thu, 29 May 2025 16:17:09 -0500 Subject: [PATCH 1/6] Add initial support for bitfields in structs and add tests --- clang/include/clang/CIR/MissingFeatures.h | 2 + clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 114 +++++++++++++ .../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 155 +++++++++++++++++- clang/test/CIR/CodeGen/bitfields.cpp | 79 +++++++++ 4 files changed, 344 insertions(+), 6 deletions(-) create mode 100644 clang/test/CIR/CodeGen/bitfields.cpp diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 45452c5929a3b..6e6adf261abfc 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -149,6 +149,8 @@ struct MissingFeatures { static bool cxxabiUseARMGuardVarABI() { return false; } static bool cxxabiAppleARM64CXXABI() { return false; } static bool cxxabiStructorImplicitParam() { return false; } + static bool isDiscreteBitFieldABI() { return false; } + static bool isBigEndian() { return false; } // Address class static bool addressOffset() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h index ac8832b8c9b24..f3c0e865ad537 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h @@ -14,6 +14,106 @@ namespace clang::CIRGen { +/// Record with information about how a bitfield should be accessed. This is +/// very similar to what LLVM codegen does, once CIR evolves it's possible we +/// can use a more higher level representation. +/// +/// Often we lay out a sequence of bitfields as a contiguous sequence of bits. +/// When the AST record layout does this, we represent it in CIR as a +/// `!cir.record` type, which directly reflects the structure's layout, +/// including bitfield packing and padding, using CIR types such as +/// `!cir.bool`, `!s8i`, `!u16i`. +/// +/// To access a particular bitfield in CIR, we use the operations +/// `cir.get_bitfield` (`GetBitfieldOp`) or `cir.set_bitfield` +/// (`SetBitfieldOp`). These operations rely on the `bitfield_info` +/// attribute, which provides detailed metadata required for access, +/// such as the size and offset of the bitfield, the type and size of +/// the underlying storage, and whether the value is signed. +/// The CIRGenRecordLayout also has a bitFields map which encodes which +/// byte-sequence this bitfield falls within. Let's assume the following C +/// struct: +/// +/// struct: +/// +/// struct S { +/// char a, b, c; +/// unsigned bits : 3; +/// unsigned more_bits : 4; +/// unsigned still_more_bits : 7; +/// }; +/// +/// This will end up as the following cir.record. The first array is the +/// bitfield, and the second is the padding out to a 4-byte alignment. +/// +/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i, +/// !cir.array<!u8i x 3>}> +/// +/// When generating code to access more_bits, we'll generate something +/// essentially like this: +/// +/// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type = +/// !u16i, size = 4, offset = 3, is_signed = false> +/// +/// cir.func @store_field() { +/// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64} +/// %1 = cir.const #cir.int<2> : !s32i +/// %2 = cir.cast(integral, %1 : !s32i), !u32i +/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> -> +/// !cir.ptr<!u16i> +/// %4 = cir.set_bitfield(#bfi_more_bits, %3 : +/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i cir.return +/// } +//// +struct CIRGenBitFieldInfo { + /// The offset within a contiguous run of bitfields that are represented as + /// a single "field" within the cir.record type. This offset is in bits. + unsigned offset : 16; + + /// The total size of the bit-field, in bits. + unsigned size : 15; + + /// Whether the bit-field is signed. + unsigned isSigned : 1; + + /// The storage size in bits which should be used when accessing this + /// bitfield. + unsigned storageSize; + + /// The offset of the bitfield storage from the start of the record. + clang::CharUnits storageOffset; + + /// The offset within a contiguous run of bitfields that are represented as a + /// single "field" within the cir.record type, taking into account the AAPCS + /// rules for volatile bitfields. This offset is in bits. + unsigned volatileOffset : 16; + + /// The storage size in bits which should be used when accessing this + /// bitfield. + unsigned volatileStorageSize; + + /// The offset of the bitfield storage from the start of the record. + clang::CharUnits volatileStorageOffset; + + /// The name of a bitfield + llvm::StringRef name; + + // The actual storage type for the bitfield + mlir::Type storageType; + + CIRGenBitFieldInfo() + : offset(), size(), isSigned(), storageSize(), volatileOffset(), + volatileStorageSize() {} + + CIRGenBitFieldInfo(unsigned offset, unsigned size, bool isSigned, + unsigned storageSize, clang::CharUnits storageOffset) + : offset(offset), size(size), isSigned(isSigned), + storageSize(storageSize), storageOffset(storageOffset) {} + + void print(llvm::raw_ostream &os) const; + void dump() const; +}; + /// This class handles record and union layout info while lowering AST types /// to CIR types. /// @@ -41,6 +141,10 @@ class CIRGenRecordLayout { // for both virtual and non-virtual bases. llvm::DenseMap<const clang::CXXRecordDecl *, unsigned> nonVirtualBases; + /// Map from (bit-field) record field to the corresponding CIR record type + /// field no. This info is populated by record builder. + llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo> bitFields; + /// False if any direct or indirect subobject of this class, when considered /// as a complete object, requires a non-zero bitpattern when /// zero-initialized. @@ -83,6 +187,16 @@ class CIRGenRecordLayout { /// Check whether this struct can be C++ zero-initialized /// with a zeroinitializer when considered as a base subobject. bool isZeroInitializableAsBase() const { return zeroInitializableAsBase; } + + /// Return the BitFieldInfo that corresponds to the field FD. + const CIRGenBitFieldInfo &getBitFieldInfo(const clang::FieldDecl *fd) const { + fd = fd->getCanonicalDecl(); + assert(fd->isBitField() && "Invalid call for non-bit-field decl!"); + llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo>::const_iterator + it = bitFields.find(fd); + assert(it != bitFields.end() && "Unable to find bitfield info"); + return it->second; + } }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index 0aeef7fd89aef..f8e5fd2fc506d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -20,6 +20,7 @@ #include "clang/AST/RecordLayout.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDataLayout.h" +#include "clang/CIR/MissingFeatures.h" #include "llvm/Support/Casting.h" #include <memory> @@ -66,6 +67,10 @@ struct CIRRecordLowering final { return MemberInfo(offset, MemberInfo::InfoKind::Field, data); } + // Layout routines. + void setBitFieldInfo(const FieldDecl *fd, CharUnits startOffset, + mlir::Type storageType); + void lower(); void lowerUnion(); @@ -77,6 +82,8 @@ struct CIRRecordLowering final { void accumulateBases(const CXXRecordDecl *cxxRecordDecl); void accumulateVPtrs(); void accumulateFields(); + void accumulateBitFields(RecordDecl::field_iterator field, + RecordDecl::field_iterator fieldEnd); CharUnits bitsToCharUnits(uint64_t bitOffset) { return astContext.toCharUnitsFromBits(bitOffset); @@ -87,6 +94,9 @@ struct CIRRecordLowering final { CharUnits getSize(mlir::Type Ty) { return CharUnits::fromQuantity(dataLayout.layout.getTypeSize(Ty)); } + CharUnits getSizeInBits(mlir::Type ty) { + return CharUnits::fromQuantity(dataLayout.layout.getTypeSizeInBits(ty)); + } CharUnits getAlignment(mlir::Type Ty) { return CharUnits::fromQuantity(dataLayout.layout.getTypeABIAlignment(Ty)); } @@ -124,6 +134,17 @@ struct CIRRecordLowering final { mlir::Type getStorageType(const CXXRecordDecl *RD) { return cirGenTypes.getCIRGenRecordLayout(RD).getBaseSubobjectCIRType(); } + // This is different from LLVM traditional codegen because CIRGen uses arrays + // of bytes instead of arbitrary-sized integers. This is important for packed + // structures support. + mlir::Type getBitfieldStorageType(unsigned numBits) { + unsigned alignedBits = llvm::alignTo(numBits, astContext.getCharWidth()); + if (cir::isValidFundamentalIntWidth(alignedBits)) + return builder.getUIntNTy(alignedBits); + + mlir::Type type = getCharType(); + return cir::ArrayType::get(type, alignedBits / astContext.getCharWidth()); + } mlir::Type getStorageType(const FieldDecl *fieldDecl) { mlir::Type type = cirGenTypes.convertTypeForMem(fieldDecl->getType()); @@ -157,6 +178,7 @@ struct CIRRecordLowering final { std::vector<MemberInfo> members; // Output fields, consumed by CIRGenTypes::computeRecordLayout llvm::SmallVector<mlir::Type, 16> fieldTypes; + llvm::DenseMap<const FieldDecl *, CIRGenBitFieldInfo> bitFields; llvm::DenseMap<const FieldDecl *, unsigned> fieldIdxMap; llvm::DenseMap<const CXXRecordDecl *, unsigned> nonVirtualBases; cir::CIRDataLayout dataLayout; @@ -186,6 +208,32 @@ CIRRecordLowering::CIRRecordLowering(CIRGenTypes &cirGenTypes, zeroInitializable(true), zeroInitializableAsBase(true), packed(packed), padded(false) {} +void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd, + CharUnits startOffset, + mlir::Type storageType) { + CIRGenBitFieldInfo &info = bitFields[fd->getCanonicalDecl()]; + info.isSigned = fd->getType()->isSignedIntegerOrEnumerationType(); + info.offset = + (unsigned)(getFieldBitOffset(fd) - astContext.toBits(startOffset)); + info.size = fd->getBitWidthValue(); + info.storageSize = getSizeInBits(storageType).getQuantity(); + info.storageOffset = startOffset; + info.storageType = storageType; + info.name = fd->getName(); + + if (info.size > info.storageSize) + info.size = info.storageSize; + // Reverse the bit offsets for big endian machines. Because we represent + // a bitfield as a single large integer load, we can imagine the bits + // counting from the most-significant-bit instead the + // least-significant-bit. + assert(!cir::MissingFeatures::isBigEndian()); + + info.volatileStorageSize = 0; + info.volatileOffset = 0; + info.volatileStorageOffset = CharUnits::Zero(); +} + void CIRRecordLowering::lower() { if (recordDecl->isUnion()) { lowerUnion(); @@ -233,6 +281,8 @@ void CIRRecordLowering::fillOutputFields() { fieldTypes.size() - 1; // A field without storage must be a bitfield. assert(!cir::MissingFeatures::bitfields()); + if (!member.data) + setBitFieldInfo(member.fieldDecl, member.offset, fieldTypes.back()); } else if (member.kind == MemberInfo::InfoKind::Base) { nonVirtualBases[member.cxxRecordDecl] = fieldTypes.size() - 1; } @@ -240,16 +290,107 @@ void CIRRecordLowering::fillOutputFields() { } } +void CIRRecordLowering::accumulateBitFields( + RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) { + // Run stores the first element of the current run of bitfields. FieldEnd is + // used as a special value to note that we don't have a current run. A + // bitfield run is a contiguous collection of bitfields that can be stored in + // the same storage block. Zero-sized bitfields and bitfields that would + // cross an alignment boundary break a run and start a new one. + RecordDecl::field_iterator run = fieldEnd; + // Tail is the offset of the first bit off the end of the current run. It's + // used to determine if the ASTRecordLayout is treating these two bitfields as + // contiguous. StartBitOffset is offset of the beginning of the Run. + uint64_t startBitOffset, tail = 0; + assert(!cir::MissingFeatures::isDiscreteBitFieldABI()); + + // Check if OffsetInRecord (the size in bits of the current run) is better + // as a single field run. When OffsetInRecord has legal integer width, and + // its bitfield offset is naturally aligned, it is better to make the + // bitfield a separate storage component so as it can be accessed directly + // with lower cost. + auto isBetterAsSingleFieldRun = [&](uint64_t offsetInRecord, + uint64_t startBitOffset, + uint64_t nextTail = 0) { + if (!cirGenTypes.getCGModule().getCodeGenOpts().FineGrainedBitfieldAccesses) + return false; + cirGenTypes.getCGModule().errorNYI(field->getSourceRange(), + "NYI FineGrainedBitfield"); + return true; + }; + + // The start field is better as a single field run. + bool startFieldAsSingleRun = false; + for (;;) { + // Check to see if we need to start a new run. + if (run == fieldEnd) { + // If we're out of fields, return. + if (field == fieldEnd) + break; + // Any non-zero-length bitfield can start a new run. + if (!field->isZeroLengthBitField()) { + run = field; + startBitOffset = getFieldBitOffset(*field); + tail = startBitOffset + field->getBitWidthValue(); + startFieldAsSingleRun = + isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset); + } + ++field; + continue; + } + + // If the start field of a new run is better as a single run, or if current + // field (or consecutive fields) is better as a single run, or if current + // field has zero width bitfield and either UseZeroLengthBitfieldAlignment + // or UseBitFieldTypeAlignment is set to true, or if the offset of current + // field is inconsistent with the offset of previous field plus its offset, + // skip the block below and go ahead to emit the storage. Otherwise, try to + // add bitfields to the run. + uint64_t nextTail = tail; + if (field != fieldEnd) + nextTail += field->getBitWidthValue(); + + if (!startFieldAsSingleRun && field != fieldEnd && + !isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset, + nextTail) && + (!field->isZeroLengthBitField() || + (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() && + !astContext.getTargetInfo().useBitFieldTypeAlignment())) && + tail == getFieldBitOffset(*field)) { + tail = nextTail; + ++field; + continue; + } + + // We've hit a break-point in the run and need to emit a storage field. + mlir::Type type = getBitfieldStorageType(tail - startBitOffset); + + // Add the storage member to the record and set the bitfield info for all of + // the bitfields in the run. Bitfields get the offset of their storage but + // come afterward and remain there after a stable sort. + members.push_back(makeStorageInfo(bitsToCharUnits(startBitOffset), type)); + for (; run != field; ++run) + members.push_back(MemberInfo(bitsToCharUnits(startBitOffset), + MemberInfo::InfoKind::Field, nullptr, *run)); + run = fieldEnd; + startFieldAsSingleRun = false; + } +} + void CIRRecordLowering::accumulateFields() { - for (const FieldDecl *field : recordDecl->fields()) { + for (RecordDecl::field_iterator field = recordDecl->field_begin(), + fieldEnd = recordDecl->field_end(); + field != fieldEnd;) { if (field->isBitField()) { - cirGenTypes.getCGModule().errorNYI(recordDecl->getSourceRange(), - "accumulate bitfields"); - ++field; + RecordDecl::field_iterator start = field; + // Iterate to gather the list of bitfields. + for (++field; field != fieldEnd && field->isBitField(); ++field) + ; + accumulateBitFields(start, field); } else if (!field->isZeroSize(astContext)) { - members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(field)), + members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)), MemberInfo::InfoKind::Field, - getStorageType(field), field)); + getStorageType(*field), *field)); ++field; } else { // TODO(cir): do we want to do anything special about zero size members? @@ -383,6 +524,8 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) { // Add all the field numbers. rl->fieldIdxMap.swap(lowering.fieldIdxMap); + rl->bitFields.swap(lowering.bitFields); + // Dump the layout, if requested. if (getASTContext().getLangOpts().DumpRecordLayouts) { cgm.errorNYI(rd->getSourceRange(), "computeRecordLayout: dump layout"); diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp new file mode 100644 index 0000000000000..2eedbcd4377e6 --- /dev/null +++ b/clang/test/CIR/CodeGen/bitfields.cpp @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +struct A { + char a, b, c; + unsigned bits : 3; + unsigned more_bits : 4; + unsigned still_more_bits : 7; +}; + +typedef struct { + int a : 4; + int b : 5; + int c; +} D; + +typedef struct { + int a : 4; + int b : 27; + int c : 17; + int d : 2; + int e : 15; + unsigned f; // type other than int above, not a bitfield +} S; + +typedef struct { + int a : 3; // one bitfield with size < 8 + unsigned b; +} T; + +typedef struct { + char a; + char b; + char c; + + // startOffset 24 bits, new storage from here + int d: 2; + int e: 2; + int f: 4; + int g: 25; + int h: 3; + int i: 4; + int j: 3; + int k: 8; + + int l: 14; // need to be a part of the new storage + // because (tail - startOffset) is 65 after 'l' field +} U; + +// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}> +// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}> +// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}> +// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}> +// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}> + + +// LLVM-DAG: %struct.D = type { i16, i32 } +// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] } +// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] } +// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 } +// LLVM-DAG: %struct.T = type { i8, i32 } + +// OGCG-DAG: %struct.D = type { i16, i32 } +// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }> +// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }> +// OGCG-DAG: %struct.S = type { i64, i16, i32 } +// OGCG-DAG: %struct.T = type { i8, i32 } + +void def() { + A a; + D d; + S s; + T t; + U u; +} >From e921f99fdc501f7156d6a56d134ac440c522da6b Mon Sep 17 00:00:00 2001 From: Andres Salamanca <andrealebarbari...@gmail.com> Date: Thu, 29 May 2025 16:31:14 -0500 Subject: [PATCH 2/6] Fix incorrect formatting --- clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h index f3c0e865ad537..6659706d2bcb2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h @@ -62,7 +62,8 @@ namespace clang::CIRGen { /// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> -> /// !cir.ptr<!u16i> /// %4 = cir.set_bitfield(#bfi_more_bits, %3 : -/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i cir.return +/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i +/// cir.return /// } //// struct CIRGenBitFieldInfo { >From e3d2843a0b9cf0d590c796df6ee06cdb6f34c9ff Mon Sep 17 00:00:00 2001 From: Andres Salamanca <andrealebarbari...@gmail.com> Date: Thu, 29 May 2025 16:33:58 -0500 Subject: [PATCH 3/6] Fix formatting --- clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h index 6659706d2bcb2..136772037ccd4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h @@ -34,8 +34,6 @@ namespace clang::CIRGen { /// byte-sequence this bitfield falls within. Let's assume the following C /// struct: /// -/// struct: -/// /// struct S { /// char a, b, c; /// unsigned bits : 3; @@ -65,7 +63,7 @@ namespace clang::CIRGen { /// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i /// cir.return /// } -//// +/// struct CIRGenBitFieldInfo { /// The offset within a contiguous run of bitfields that are represented as /// a single "field" within the cir.record type. This offset is in bits. >From 215b3e0315a8a8c2d7c0f41cf766dcfd0984c4a6 Mon Sep 17 00:00:00 2001 From: Andres Salamanca <andrealebarbari...@gmail.com> Date: Tue, 3 Jun 2025 18:08:37 -0500 Subject: [PATCH 4/6] Applied code review changes --- clang/include/clang/CIR/MissingFeatures.h | 1 + clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 15 ++--- .../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 60 +++++++++---------- clang/test/CIR/CodeGen/bitfields.cpp | 31 +++++----- 4 files changed, 53 insertions(+), 54 deletions(-) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 6e6adf261abfc..e0b2959f374f8 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -241,6 +241,7 @@ struct MissingFeatures { static bool builtinCall() { return false; } static bool builtinCallF128() { return false; } static bool builtinCallMathErrno() { return false; } + static bool nonFineGrainedBitfields() { return false; } // Missing types static bool dataMemberType() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h index 136772037ccd4..a852a1edac139 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h @@ -41,8 +41,9 @@ namespace clang::CIRGen { /// unsigned still_more_bits : 7; /// }; /// -/// This will end up as the following cir.record. The first array is the -/// bitfield, and the second is the padding out to a 4-byte alignment. +/// This will end up as the following cir.record. The bitfield members are +/// represented by two !u8i values, and the array provides padding to align the +/// struct to a 4-byte alignment. /// /// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i, /// !cir.array<!u8i x 3>}> @@ -51,16 +52,16 @@ namespace clang::CIRGen { /// essentially like this: /// /// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type = -/// !u16i, size = 4, offset = 3, is_signed = false> +/// !u8i, size = 4, offset = 3, is_signed = false> /// /// cir.func @store_field() { /// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64} /// %1 = cir.const #cir.int<2> : !s32i /// %2 = cir.cast(integral, %1 : !s32i), !u32i -/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> -> -/// !cir.ptr<!u16i> +/// %3 = cir.get_member %0[3] {name = "more_bits"} : !cir.ptr<!rec_S> -> +/// !cir.ptr<!u8i> /// %4 = cir.set_bitfield(#bfi_more_bits, %3 : -/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i +/// !cir.ptr<!u8i>, %2 : !u32i) -> !u32i /// cir.return /// } /// @@ -110,7 +111,7 @@ struct CIRGenBitFieldInfo { storageSize(storageSize), storageOffset(storageOffset) {} void print(llvm::raw_ostream &os) const; - void dump() const; + LLVM_DUMP_METHOD void dump() const; }; /// This class handles record and union layout info while lowering AST types diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index f8e5fd2fc506d..7f12448e2e525 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -223,9 +223,9 @@ void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd, if (info.size > info.storageSize) info.size = info.storageSize; - // Reverse the bit offsets for big endian machines. Because we represent - // a bitfield as a single large integer load, we can imagine the bits - // counting from the most-significant-bit instead the + // Reverse the bit offsets for big endian machines. Since bitfields are laid + // out as packed bits within an integer-sized unit, we can imagine the bits + // counting from the most-significant-bit instead of the // least-significant-bit. assert(!cir::MissingFeatures::isBigEndian()); @@ -292,35 +292,25 @@ void CIRRecordLowering::fillOutputFields() { void CIRRecordLowering::accumulateBitFields( RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) { - // Run stores the first element of the current run of bitfields. FieldEnd is - // used as a special value to note that we don't have a current run. A + // 'run' stores the first element of the current run of bitfields. 'fieldEnd' + // is used as a special value to note that we don't have a current run. A // bitfield run is a contiguous collection of bitfields that can be stored in // the same storage block. Zero-sized bitfields and bitfields that would // cross an alignment boundary break a run and start a new one. RecordDecl::field_iterator run = fieldEnd; - // Tail is the offset of the first bit off the end of the current run. It's + // 'tail' is the offset of the first bit off the end of the current run. It's // used to determine if the ASTRecordLayout is treating these two bitfields as - // contiguous. StartBitOffset is offset of the beginning of the Run. + // contiguous. 'startBitOffset' is offset of the beginning of the run. uint64_t startBitOffset, tail = 0; assert(!cir::MissingFeatures::isDiscreteBitFieldABI()); - // Check if OffsetInRecord (the size in bits of the current run) is better + // Check if 'offsetInRecord' (the size in bits of the current run) is better // as a single field run. When OffsetInRecord has legal integer width, and // its bitfield offset is naturally aligned, it is better to make the // bitfield a separate storage component so as it can be accessed directly // with lower cost. - auto isBetterAsSingleFieldRun = [&](uint64_t offsetInRecord, - uint64_t startBitOffset, - uint64_t nextTail = 0) { - if (!cirGenTypes.getCGModule().getCodeGenOpts().FineGrainedBitfieldAccesses) - return false; - cirGenTypes.getCGModule().errorNYI(field->getSourceRange(), - "NYI FineGrainedBitfield"); - return true; - }; + assert(!cir::MissingFeatures::nonFineGrainedBitfields()); - // The start field is better as a single field run. - bool startFieldAsSingleRun = false; for (;;) { // Check to see if we need to start a new run. if (run == fieldEnd) { @@ -332,27 +322,34 @@ void CIRRecordLowering::accumulateBitFields( run = field; startBitOffset = getFieldBitOffset(*field); tail = startBitOffset + field->getBitWidthValue(); - startFieldAsSingleRun = - isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset); + assert(!cir::MissingFeatures::nonFineGrainedBitfields()); } ++field; continue; } - // If the start field of a new run is better as a single run, or if current - // field (or consecutive fields) is better as a single run, or if current - // field has zero width bitfield and either UseZeroLengthBitfieldAlignment - // or UseBitFieldTypeAlignment is set to true, or if the offset of current - // field is inconsistent with the offset of previous field plus its offset, - // skip the block below and go ahead to emit the storage. Otherwise, try to - // add bitfields to the run. + // Decide whether to continue extending the current bitfield run. + // + // Skip the block below and go directly to emitting storage if any of the + // following is true: + // - 1. The first field in the run is better treated as its own run. + // - 2. We have reached the end of the fields. + // - 3. The current field (or set of fields) is better as its own run. + // - 4. The current field is a zero-width bitfield or: + // - Zero-length bitfield alignment is enabled, and + // - Bitfield type alignment is enabled. + // - 5. The current field's offset doesn't match the expected tail (i.e., + // layout isn't contiguous). + // + // If none of the above conditions are met, add the current field to the + // current run. uint64_t nextTail = tail; if (field != fieldEnd) nextTail += field->getBitWidthValue(); - if (!startFieldAsSingleRun && field != fieldEnd && - !isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset, - nextTail) && + // TODO: add condition 1 and 3 + assert(!cir::MissingFeatures::nonFineGrainedBitfields()); + if (field != fieldEnd && (!field->isZeroLengthBitField() || (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() && !astContext.getTargetInfo().useBitFieldTypeAlignment())) && @@ -373,7 +370,6 @@ void CIRRecordLowering::accumulateBitFields( members.push_back(MemberInfo(bitsToCharUnits(startBitOffset), MemberInfo::InfoKind::Field, nullptr, *run)); run = fieldEnd; - startFieldAsSingleRun = false; } } diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp index 2eedbcd4377e6..3bd48ac4e09e8 100644 --- a/clang/test/CIR/CodeGen/bitfields.cpp +++ b/clang/test/CIR/CodeGen/bitfields.cpp @@ -12,12 +12,20 @@ struct A { unsigned still_more_bits : 7; }; +// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}> +// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] } +// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }> + typedef struct { int a : 4; int b : 5; int c; } D; +// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}> +// LLVM-DAG: %struct.D = type { i16, i32 } +// OGCG-DAG: %struct.D = type { i16, i32 } + typedef struct { int a : 4; int b : 27; @@ -27,11 +35,19 @@ typedef struct { unsigned f; // type other than int above, not a bitfield } S; +// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}> +// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 } +// OGCG-DAG: %struct.S = type { i64, i16, i32 } + typedef struct { int a : 3; // one bitfield with size < 8 unsigned b; } T; +// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}> +// LLVM-DAG: %struct.T = type { i8, i32 } +// OGCG-DAG: %struct.T = type { i8, i32 } + typedef struct { char a; char b; @@ -51,24 +67,9 @@ typedef struct { // because (tail - startOffset) is 65 after 'l' field } U; -// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}> -// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}> // CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}> -// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}> -// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}> - - -// LLVM-DAG: %struct.D = type { i16, i32 } // LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] } -// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] } -// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 } -// LLVM-DAG: %struct.T = type { i8, i32 } - -// OGCG-DAG: %struct.D = type { i16, i32 } // OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }> -// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }> -// OGCG-DAG: %struct.S = type { i64, i16, i32 } -// OGCG-DAG: %struct.T = type { i8, i32 } void def() { A a; >From 87d1c38f71ae87472dfb8125303ba2f50ab4e98b Mon Sep 17 00:00:00 2001 From: Andres Salamanca <andrealebarbari...@gmail.com> Date: Wed, 18 Jun 2025 15:24:43 -0500 Subject: [PATCH 5/6] Update accumulated bit fields algorithm --- clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 10 +- .../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 261 +++++++++++++----- clang/lib/CIR/CodeGen/TargetInfo.cpp | 37 +++ clang/lib/CIR/CodeGen/TargetInfo.h | 10 + clang/test/CIR/CodeGen/bitfields.c | 79 ++++++ clang/test/CIR/CodeGen/bitfields.cpp | 52 +--- 6 files changed, 321 insertions(+), 128 deletions(-) create mode 100644 clang/test/CIR/CodeGen/bitfields.c diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h index a852a1edac139..3b51ab784d374 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h @@ -42,26 +42,26 @@ namespace clang::CIRGen { /// }; /// /// This will end up as the following cir.record. The bitfield members are -/// represented by two !u8i values, and the array provides padding to align the +/// represented by one !u16i value, and the array provides padding to align the /// struct to a 4-byte alignment. /// -/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i, +/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u16i, /// !cir.array<!u8i x 3>}> /// /// When generating code to access more_bits, we'll generate something /// essentially like this: /// /// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type = -/// !u8i, size = 4, offset = 3, is_signed = false> +/// !u16i, size = 4, offset = 3, is_signed = false> /// /// cir.func @store_field() { /// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64} /// %1 = cir.const #cir.int<2> : !s32i /// %2 = cir.cast(integral, %1 : !s32i), !u32i /// %3 = cir.get_member %0[3] {name = "more_bits"} : !cir.ptr<!rec_S> -> -/// !cir.ptr<!u8i> +/// !cir.ptr<!u16i> /// %4 = cir.set_bitfield(#bfi_more_bits, %3 : -/// !cir.ptr<!u8i>, %2 : !u32i) -> !u32i +/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i /// cir.return /// } /// diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index 7f12448e2e525..8dbf1b36a93b2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -82,8 +82,9 @@ struct CIRRecordLowering final { void accumulateBases(const CXXRecordDecl *cxxRecordDecl); void accumulateVPtrs(); void accumulateFields(); - void accumulateBitFields(RecordDecl::field_iterator field, - RecordDecl::field_iterator fieldEnd); + RecordDecl::field_iterator + accumulateBitFields(RecordDecl::field_iterator field, + RecordDecl::field_iterator fieldEnd); CharUnits bitsToCharUnits(uint64_t bitOffset) { return astContext.toCharUnitsFromBits(bitOffset); @@ -290,87 +291,199 @@ void CIRRecordLowering::fillOutputFields() { } } -void CIRRecordLowering::accumulateBitFields( - RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) { - // 'run' stores the first element of the current run of bitfields. 'fieldEnd' - // is used as a special value to note that we don't have a current run. A - // bitfield run is a contiguous collection of bitfields that can be stored in - // the same storage block. Zero-sized bitfields and bitfields that would - // cross an alignment boundary break a run and start a new one. - RecordDecl::field_iterator run = fieldEnd; - // 'tail' is the offset of the first bit off the end of the current run. It's - // used to determine if the ASTRecordLayout is treating these two bitfields as - // contiguous. 'startBitOffset' is offset of the beginning of the run. - uint64_t startBitOffset, tail = 0; +RecordDecl::field_iterator +CIRRecordLowering::accumulateBitFields(RecordDecl::field_iterator field, + RecordDecl::field_iterator fieldEnd) { assert(!cir::MissingFeatures::isDiscreteBitFieldABI()); - // Check if 'offsetInRecord' (the size in bits of the current run) is better - // as a single field run. When OffsetInRecord has legal integer width, and - // its bitfield offset is naturally aligned, it is better to make the - // bitfield a separate storage component so as it can be accessed directly - // with lower cost. - assert(!cir::MissingFeatures::nonFineGrainedBitfields()); + CharUnits regSize = + bitsToCharUnits(astContext.getTargetInfo().getRegisterWidth()); + unsigned charBits = astContext.getCharWidth(); + + // Data about the start of the span we're accumulating to create an access + // unit from. 'Begin' is the first bitfield of the span. If 'begin' is + // 'fieldEnd', we've not got a current span. The span starts at the + // 'beginOffset' character boundary. 'bitSizeSinceBegin' is the size (in bits) + // of the span -- this might include padding when we've advanced to a + // subsequent bitfield run. + RecordDecl::field_iterator begin = fieldEnd; + CharUnits beginOffset; + uint64_t bitSizeSinceBegin; + + // The (non-inclusive) end of the largest acceptable access unit we've found + // since 'begin'. If this is 'begin', we're gathering the initial set of + // bitfields of a new span. 'bestEndOffset' is the end of that acceptable + // access unit -- it might extend beyond the last character of the bitfield + // run, using available padding characters. + RecordDecl::field_iterator bestEnd = begin; + CharUnits bestEndOffset; + bool bestClipped; // Whether the representation must be in a byte array. for (;;) { - // Check to see if we need to start a new run. - if (run == fieldEnd) { - // If we're out of fields, return. - if (field == fieldEnd) + // atAlignedBoundary is true if 'field' is the (potential) start of a new + // span (or the end of the bitfields). When true, limitOffset is the + // character offset of that span and barrier indicates whether the new + // span cannot be merged into the current one. + bool atAlignedBoundary = false; + bool barrier = false; // a barrier can be a zero Bit Width or non bit member + if (field != fieldEnd && field->isBitField()) { + uint64_t bitOffset = getFieldBitOffset(*field); + if (begin == fieldEnd) { + // Beginning a new span. + begin = field; + bestEnd = begin; + + assert((bitOffset % charBits) == 0 && "Not at start of char"); + beginOffset = bitsToCharUnits(bitOffset); + bitSizeSinceBegin = 0; + } else if ((bitOffset % charBits) != 0) { + // Bitfield occupies the same character as previous bitfield, it must be + // part of the same span. This can include zero-length bitfields, should + // the target not align them to character boundaries. Such non-alignment + // is at variance with the standards, which require zero-length + // bitfields be a barrier between access units. But of course we can't + // achieve that in the middle of a character. + assert(bitOffset == + astContext.toBits(beginOffset) + bitSizeSinceBegin && + "Concatenating non-contiguous bitfields"); + } else { + // Bitfield potentially begins a new span. This includes zero-length + // bitfields on non-aligning targets that lie at character boundaries + // (those are barriers to merging). + if (field->isZeroLengthBitField()) + barrier = true; + atAlignedBoundary = true; + } + } else { + // We've reached the end of the bitfield run. Either we're done, or this + // is a barrier for the current span. + if (begin == fieldEnd) break; - // Any non-zero-length bitfield can start a new run. - if (!field->isZeroLengthBitField()) { - run = field; - startBitOffset = getFieldBitOffset(*field); - tail = startBitOffset + field->getBitWidthValue(); - assert(!cir::MissingFeatures::nonFineGrainedBitfields()); + + barrier = true; + atAlignedBoundary = true; + } + + // 'installBest' indicates whether we should create an access unit for the + // current best span: fields ['begin', 'bestEnd') occupying characters + // ['beginOffset', 'bestEndOffset'). + bool installBest = false; + if (atAlignedBoundary) { + // 'field' is the start of a new span or the end of the bitfields. The + // just-seen span now extends to 'bitSizeSinceBegin'. + + // Determine if we can accumulate that just-seen span into the current + // accumulation. + CharUnits accessSize = bitsToCharUnits(bitSizeSinceBegin + charBits - 1); + if (bestEnd == begin) { + // This is the initial run at the start of a new span. By definition, + // this is the best seen so far. + bestEnd = field; + bestEndOffset = beginOffset + accessSize; + // Assume clipped until proven not below. + bestClipped = true; + if (!bitSizeSinceBegin) + // A zero-sized initial span -- this will install nothing and reset + // for another. + installBest = true; + } else if (accessSize > regSize) { + // Accumulating the just-seen span would create a multi-register access + // unit, which would increase register pressure. + installBest = true; + } + + if (!installBest) { + // Determine if accumulating the just-seen span will create an expensive + // access unit or not. + mlir::Type type = getUIntNType(astContext.toBits(accessSize)); + if (!astContext.getTargetInfo().hasCheapUnalignedBitFieldAccess()) + cirGenTypes.getCGModule().errorNYI( + field->getSourceRange(), "NYI CheapUnalignedBitFieldAccess"); + + if (!installBest) { + // Find the next used storage offset to determine what the limit of + // the current span is. That's either the offset of the next field + // with storage (which might be field itself) or the end of the + // non-reusable tail padding. + CharUnits limitOffset; + for (auto probe = field; probe != fieldEnd; ++probe) + if (!isEmptyFieldForLayout(astContext, *probe)) { + // A member with storage sets the limit. + assert((getFieldBitOffset(*probe) % charBits) == 0 && + "Next storage is not byte-aligned"); + limitOffset = bitsToCharUnits(getFieldBitOffset(*probe)); + goto FoundLimit; + } + assert(!cir::MissingFeatures::cxxSupport()); + limitOffset = astRecordLayout.getDataSize(); + FoundLimit: + CharUnits typeSize = getSize(type); + if (beginOffset + typeSize <= limitOffset) { + // There is space before limitOffset to create a naturally-sized + // access unit. + bestEndOffset = beginOffset + typeSize; + bestEnd = field; + bestClipped = false; + } + if (barrier) { + // The next field is a barrier that we cannot merge across. + installBest = true; + } else if (cirGenTypes.getCGModule() + .getCodeGenOpts() + .FineGrainedBitfieldAccesses) { + assert(!cir::MissingFeatures::nonFineGrainedBitfields()); + cirGenTypes.getCGModule().errorNYI(field->getSourceRange(), + "NYI FineGrainedBitfield"); + } else { + // Otherwise, we're not installing. Update the bit size + // of the current span to go all the way to limitOffset, which is + // the (aligned) offset of next bitfield to consider. + bitSizeSinceBegin = astContext.toBits(limitOffset - beginOffset); + } + } } - ++field; - continue; } - // Decide whether to continue extending the current bitfield run. - // - // Skip the block below and go directly to emitting storage if any of the - // following is true: - // - 1. The first field in the run is better treated as its own run. - // - 2. We have reached the end of the fields. - // - 3. The current field (or set of fields) is better as its own run. - // - 4. The current field is a zero-width bitfield or: - // - Zero-length bitfield alignment is enabled, and - // - Bitfield type alignment is enabled. - // - 5. The current field's offset doesn't match the expected tail (i.e., - // layout isn't contiguous). - // - // If none of the above conditions are met, add the current field to the - // current run. - uint64_t nextTail = tail; - if (field != fieldEnd) - nextTail += field->getBitWidthValue(); - - // TODO: add condition 1 and 3 - assert(!cir::MissingFeatures::nonFineGrainedBitfields()); - if (field != fieldEnd && - (!field->isZeroLengthBitField() || - (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() && - !astContext.getTargetInfo().useBitFieldTypeAlignment())) && - tail == getFieldBitOffset(*field)) { - tail = nextTail; + if (installBest) { + assert((field == fieldEnd || !field->isBitField() || + (getFieldBitOffset(*field) % charBits) == 0) && + "Installing but not at an aligned bitfield or limit"); + CharUnits accessSize = bestEndOffset - beginOffset; + if (!accessSize.isZero()) { + // Add the storage member for the access unit to the record. The + // bitfields get the offset of their storage but come afterward and + // remain there after a stable sort. + mlir::Type type; + if (bestClipped) { + assert(getSize(getUIntNType(astContext.toBits(accessSize))) > + accessSize && + "Clipped access need not be clipped"); + type = getByteArrayType(accessSize); + } else { + type = getUIntNType(astContext.toBits(accessSize)); + assert(getSize(type) == accessSize && + "Unclipped access must be clipped"); + } + members.push_back(makeStorageInfo(beginOffset, type)); + for (; begin != bestEnd; ++begin) + if (!begin->isZeroLengthBitField()) + members.push_back(MemberInfo( + beginOffset, MemberInfo::InfoKind::Field, nullptr, *begin)); + } + // Reset to start a new span. + field = bestEnd; + begin = fieldEnd; + } else { + assert(field != fieldEnd && field->isBitField() && + "Accumulating past end of bitfields"); + assert(!barrier && "Accumulating across barrier"); + // Accumulate this bitfield into the current (potential) span. + bitSizeSinceBegin += field->getBitWidthValue(); ++field; - continue; } - - // We've hit a break-point in the run and need to emit a storage field. - mlir::Type type = getBitfieldStorageType(tail - startBitOffset); - - // Add the storage member to the record and set the bitfield info for all of - // the bitfields in the run. Bitfields get the offset of their storage but - // come afterward and remain there after a stable sort. - members.push_back(makeStorageInfo(bitsToCharUnits(startBitOffset), type)); - for (; run != field; ++run) - members.push_back(MemberInfo(bitsToCharUnits(startBitOffset), - MemberInfo::InfoKind::Field, nullptr, *run)); - run = fieldEnd; } + + return field; } void CIRRecordLowering::accumulateFields() { @@ -382,7 +495,9 @@ void CIRRecordLowering::accumulateFields() { // Iterate to gather the list of bitfields. for (++field; field != fieldEnd && field->isBitField(); ++field) ; - accumulateBitFields(start, field); + field = accumulateBitFields(start, field); + assert((field == fieldEnd || !field->isBitField()) && + "Failed to accumulate all the bitfields"); } else if (!field->isZeroSize(astContext)) { members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)), MemberInfo::InfoKind::Field, diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 551341ff20c00..d2d32bbd9403c 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -3,6 +3,43 @@ using namespace clang; using namespace clang::CIRGen; + +bool clang::CIRGen::isEmptyRecordForLayout(const ASTContext &context, + QualType t) { + const RecordType *rt = t->getAs<RecordType>(); + if (!rt) + return false; + + const RecordDecl *rd = rt->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *cxxrd = dyn_cast<CXXRecordDecl>(rd)) { + if (cxxrd->isDynamicClass()) + return false; + + for (const auto &I : cxxrd->bases()) + if (!isEmptyRecordForLayout(context, I.getType())) + return false; + } + + for (const auto *I : rd->fields()) + if (!isEmptyFieldForLayout(context, I)) + return false; + + return true; +} + +bool clang::CIRGen::isEmptyFieldForLayout(const ASTContext &context, + const FieldDecl *fd) { + if (fd->isZeroLengthBitField()) + return true; + + if (fd->isUnnamedBitField()) + return false; + + return isEmptyRecordForLayout(context, fd->getType()); +} + namespace { class X8664ABIInfo : public ABIInfo { diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index d31d1ee82d90a..a5c548aa2c7c4 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -22,6 +22,16 @@ namespace clang::CIRGen { +/// isEmptyFieldForLayout - Return true if the field is "empty", that is, +/// either a zero-width bit-field or an isEmptyRecordForLayout. +bool isEmptyFieldForLayout(const ASTContext &context, const FieldDecl *fd); + +/// isEmptyRecordForLayout - Return true if a structure contains only empty +/// base classes (per isEmptyRecordForLayout) and fields (per +/// isEmptyFieldForLayout). Note, C++ record fields are considered empty +/// if the [[no_unique_address]] attribute would have made them empty. +bool isEmptyRecordForLayout(const ASTContext &context, QualType t); + class TargetCIRGenInfo { std::unique_ptr<ABIInfo> info; diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c new file mode 100644 index 0000000000000..a3827ed1e2fa1 --- /dev/null +++ b/clang/test/CIR/CodeGen/bitfields.c @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +typedef struct { + char a, b, c; + unsigned bits : 3; + unsigned more_bits : 4; + unsigned still_more_bits : 7; +} A; + +// CIR-DAG: !rec_A = !cir.record<struct "A" packed padded {!s8i, !s8i, !s8i, !u16i, !cir.array<!u8i x 3>}> +// LLVM-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }> +// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }> + +typedef struct { + int a : 4; + int b : 5; + int c; +} D; + +// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}> +// LLVM-DAG: %struct.D = type { i16, i32 } +// OGCG-DAG: %struct.D = type { i16, i32 } + +typedef struct { + int a : 4; + int b : 27; + int c : 17; + int d : 2; + int e : 15; + unsigned f; // type other than int above, not a bitfield +} S; +// CIR-DAG: !rec_S = !cir.record<struct "S" {!u64i, !u16i, !u32i}> +// LLVM-DAG: %struct.S = type { i64, i16, i32 } +// OGCG-DAG: %struct.S = type { i64, i16, i32 } + +typedef struct { + int a : 3; // one bitfield with size < 8 + unsigned b; +} T; + +// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}> +// LLVM-DAG: %struct.T = type { i8, i32 } +// OGCG-DAG: %struct.T = type { i8, i32 } + +typedef struct { + char a; + char b; + char c; + + // startOffset 24 bits, new storage from here + int d: 2; + int e: 2; + int f: 4; + int g: 25; + int h: 3; + int i: 4; + int j: 3; + int k: 8; + + int l: 14; // need to be a part of the new storage + // because (tail - startOffset) is 65 after 'l' field +} U; + +// CIR-DAG: !rec_U = !cir.record<struct "U" packed {!s8i, !s8i, !s8i, !u8i, !u64i}> +// LLVM-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }> +// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }> + +void def() { + A a; + D d; + S s; + T t; + U u; +} diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp index 3bd48ac4e09e8..762d249884741 100644 --- a/clang/test/CIR/CodeGen/bitfields.cpp +++ b/clang/test/CIR/CodeGen/bitfields.cpp @@ -5,27 +5,6 @@ // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll // RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG -struct A { - char a, b, c; - unsigned bits : 3; - unsigned more_bits : 4; - unsigned still_more_bits : 7; -}; - -// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}> -// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] } -// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }> - -typedef struct { - int a : 4; - int b : 5; - int c; -} D; - -// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}> -// LLVM-DAG: %struct.D = type { i16, i32 } -// OGCG-DAG: %struct.D = type { i16, i32 } - typedef struct { int a : 4; int b : 27; @@ -34,9 +13,8 @@ typedef struct { int e : 15; unsigned f; // type other than int above, not a bitfield } S; - -// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}> -// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 } +// CIR-DAG: !rec_S = !cir.record<struct "S" {!u64i, !u16i, !u32i}> +// LLVM-DAG: %struct.S = type { i64, i16, i32 } // OGCG-DAG: %struct.S = type { i64, i16, i32 } typedef struct { @@ -48,33 +26,7 @@ typedef struct { // LLVM-DAG: %struct.T = type { i8, i32 } // OGCG-DAG: %struct.T = type { i8, i32 } -typedef struct { - char a; - char b; - char c; - - // startOffset 24 bits, new storage from here - int d: 2; - int e: 2; - int f: 4; - int g: 25; - int h: 3; - int i: 4; - int j: 3; - int k: 8; - - int l: 14; // need to be a part of the new storage - // because (tail - startOffset) is 65 after 'l' field -} U; - -// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}> -// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] } -// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }> - void def() { - A a; - D d; S s; T t; - U u; } >From 27eb4dd977139f5ef2dcd84bd5e440c049f64960 Mon Sep 17 00:00:00 2001 From: Andres Salamanca <andrealebarbari...@gmail.com> Date: Wed, 18 Jun 2025 15:27:43 -0500 Subject: [PATCH 6/6] Remove old comment --- clang/test/CIR/CodeGen/bitfields.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c index a3827ed1e2fa1..ff5c6bc1787b4 100644 --- a/clang/test/CIR/CodeGen/bitfields.c +++ b/clang/test/CIR/CodeGen/bitfields.c @@ -62,8 +62,7 @@ typedef struct { int j: 3; int k: 8; - int l: 14; // need to be a part of the new storage - // because (tail - startOffset) is 65 after 'l' field + int l: 14; } U; // CIR-DAG: !rec_U = !cir.record<struct "U" packed {!s8i, !s8i, !s8i, !u8i, !u64i}> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits