================ @@ -439,82 +444,194 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset), MemberInfo::Field, nullptr, *Field)); } - return; + return Field; } - // Check if OffsetInRecord (the size in bits of the current run) is better - // as a single field run. When OffsetInRecord has legal integer width, and - // its bitfield offset is naturally aligned, it is better to make the - // bitfield a separate storage component so as it can be accessed directly - // with lower cost. - auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord, - uint64_t StartBitOffset) { - if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) - return false; - if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || - !DataLayout.fitsInLegalInteger(OffsetInRecord)) - return false; - // Make sure StartBitOffset is naturally aligned if it is treated as an - // IType integer. - if (StartBitOffset % - Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != - 0) - return false; - return true; - }; + // The SysV ABI can overlap bitfield storage units with both other bitfield + // storage units /and/ other non-bitfield data members. Accessing a sequence + // of bitfields mustn't interfere with adjacent non-bitfields -- they're + // permitted to be accessed in separate threads for instance. + + // We split runs of bit-fields into a sequence of "access units". When we emit + // a load or store of a bit-field, we'll load/store the entire containing + // access unit. As mentioned, the standard requires that these loads and + // stores must not interfere with accesses to other memory locations, and it + // defines the bit-field's memory location as the current run of + // non-zero-width bit-fields. So an access unit must never overlap with + // non-bit-field storage or cross a zero-width bit-field. Otherwise, we're + // free to draw the lines as we see fit. + + // Drawing these lines well can be complicated. LLVM generally can't modify a + // program to access memory that it didn't before, so using very narrow access + // units can prevent the compiler from using optimal access patterns. For + // example, suppose a run of bit-fields occupies four bytes in a struct. If we + // split that into four 1-byte access units, then a sequence of assignments + // that doesn't touch all four bytes may have to be emitted with multiple + // 8-bit stores instead of a single 32-bit store. On the other hand, if we use + // very wide access units, we may find ourselves emitting accesses to + // bit-fields we didn't really need to touch, just because LLVM was unable to + // clean up after us. + + // It is desirable to have access units be aligned powers of 2 no larger than + // a register. (On non-strict alignment ISAs, the alignment requirement can be + // dropped.) A three byte access unit will be accessed using 2-byte and 1-byte + // accesses and bit manipulation. If no bitfield straddles across the two + // separate accesses, it is better to have separate 2-byte and 1-byte access + // units, as then LLVM will not generate unnecessary memory accesses, or bit + // manipulation. Similarly, on a strict-alignment architecture, it is better + // to keep access-units naturally aligned, to avoid similar bit + // manipulation synthesizing larger unaligned accesses. + + // We do this in two phases, processing a sequential run of bitfield + // declarations. + + // a) Bitfields that share parts of a single byte are, of necessity, placed in + // the same access unit. That unit will encompass a consecutive + // run where adjacent bitfields share parts of a byte. (The first bitfield of + // such an access unit will start at the beginning of a byte.) + + // b) Accumulate adjacent access units when the combined unit is naturally + // sized, no larger than a register, and on a strict alignment ISA, + // aligned. Note that this requires lookahead to one or more subsequent access + // units. For instance, consider a 2-byte access-unit followed by 2 1-byte + // units. We can merge that into a 4-byte access-unit, but we would not want + // to merge a 2-byte followed by a single 1-byte (and no available tail + // padding). + + // This accumulation is prevented when: + // *) it would cross a zero-width bitfield (ABI-dependent), or + // *) one of the candidate access units contains a volatile bitfield, or + // *) fine-grained bitfield access option is in effect. + + CharUnits RegSize = + bitsToCharUnits(Context.getTargetInfo().getRegisterWidth()); + unsigned CharBits = Context.getCharWidth(); + + RecordDecl::field_iterator Begin = FieldEnd; + CharUnits StartOffset; + uint64_t BitSize; + CharUnits BestEndOffset; + RecordDecl::field_iterator BestEnd = Begin; + bool Volatile; - // The start field is better as a single field run. - bool StartFieldAsSingleRun = false; for (;;) { - // Check to see if we need to start a new run. - if (Run == FieldEnd) { - // If we're out of fields, return. - if (Field == FieldEnd) - break; - // Any non-zero-length bitfield can start a new run. - if (!Field->isZeroLengthBitField(Context)) { - Run = Field; - StartBitOffset = getFieldBitOffset(*Field); - Tail = StartBitOffset + Field->getBitWidthValue(Context); - StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Tail - StartBitOffset, - StartBitOffset); + CharUnits Limit; + bool Barrier; + bool Install = false; + + if (Field != FieldEnd && Field->isBitField()) { + uint64_t BitOffset = getFieldBitOffset(*Field); + if (Begin == FieldEnd) { + // Beginning a new access unit. + Begin = Field; + BestEnd = Begin; + + assert(!(BitOffset % CharBits) && "Not at start of char"); + StartOffset = bitsToCharUnits(BitOffset); + BitSize = 0; + Volatile = false; + } else if (BitOffset % CharBits) { + // Bitfield occupies the same char as previous. + assert(BitOffset == Context.toBits(StartOffset) + BitSize && + "Concatenating non-contiguous bitfields"); + } else { + // Bitfield begins a new access unit. + Limit = bitsToCharUnits(BitOffset); + Barrier = false; + if (Field->isZeroLengthBitField(Context) && + (Context.getTargetInfo().useZeroLengthBitfieldAlignment() || + Context.getTargetInfo().useBitFieldTypeAlignment())) + Barrier = true; + Install = true; } - ++Field; - continue; + } else if (Begin == FieldEnd) { + // Completed the bitfields. + break; + } else { + // End of the bitfield span, with active access unit. + auto Probe = Field; + while (Probe != FieldEnd && Probe->isZeroSize(Context)) + ++Probe; + // We can't necessarily use tail padding in C++ structs, so the NonVirtual + // size is what we must use there. + Limit = Probe != FieldEnd ? bitsToCharUnits(getFieldBitOffset(*Probe)) + : RD ? Layout.getNonVirtualSize() + : Layout.getDataSize(); + Barrier = true; + Install = true; } - // If the start field of a new run is better as a single run, or - // if current field (or consecutive fields) is better as a single run, or - // if current field has zero width bitfield and either - // UseZeroLengthBitfieldAlignment or UseBitFieldTypeAlignment is set to - // true, or - // if the offset of current field is inconsistent with the offset of - // previous field plus its offset, - // skip the block below and go ahead to emit the storage. - // Otherwise, try to add bitfields to the run. - if (!StartFieldAsSingleRun && Field != FieldEnd && - !IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset) && - (!Field->isZeroLengthBitField(Context) || - (!Context.getTargetInfo().useZeroLengthBitfieldAlignment() && - !Context.getTargetInfo().useBitFieldTypeAlignment())) && - Tail == getFieldBitOffset(*Field)) { - Tail += Field->getBitWidthValue(Context); - ++Field; - continue; + if (Install) { + // Found the start of a new access unit. Determine if that completes the + // current one, or potentially extends it. + Install = false; + CharUnits Size = bitsToCharUnits(BitSize + CharBits - 1); + if (BestEnd == Begin) { + // This is the initial access unit. + BestEnd = Field; + BestEndOffset = StartOffset + Size; + if (!BitSize || Types.getCodeGenOpts().FineGrainedBitfieldAccesses) + // A barrier, or we're fine grained. + Install = true; + } else if (Size > RegSize || Volatile) + // Too big to accumulate, or just-seen access unit contains a volatile. + Install = true; + + if (!Install) { ---------------- rjmccall wrote:
Comments on these blocks, please. Here I think you're saying: if the target doesn't have cheap unaligned accesses, check whether LLVM would consider the entire access unit since `Begin` well-aligned. If not, emit `Best` without adding anything else — either `Best` is the original access unit and is already misaligned, or `Best` was previously well-aligned (since we didn't emit it) but adding the new bit-field would misalign it. https://github.com/llvm/llvm-project/pull/65742 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits