================ @@ -439,82 +444,247 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset), MemberInfo::Field, nullptr, *Field)); } - return; + return Field; } - // Check if OffsetInRecord (the size in bits of the current run) is better - // as a single field run. When OffsetInRecord has legal integer width, and - // its bitfield offset is naturally aligned, it is better to make the - // bitfield a separate storage component so as it can be accessed directly - // with lower cost. - auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord, - uint64_t StartBitOffset) { - if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) - return false; - if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || - !DataLayout.fitsInLegalInteger(OffsetInRecord)) - return false; - // Make sure StartBitOffset is naturally aligned if it is treated as an - // IType integer. - if (StartBitOffset % - Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != - 0) - return false; - return true; - }; + // The SysV ABI can overlap bitfield storage units with both other bitfield + // storage units /and/ other non-bitfield data members. Accessing a sequence + // of bitfields mustn't interfere with adjacent non-bitfields -- they're + // permitted to be accessed in separate threads for instance. + + // We split runs of bit-fields into a sequence of "access units". When we emit + // a load or store of a bit-field, we'll load/store the entire containing + // access unit. As mentioned, the standard requires that these loads and + // stores must not interfere with accesses to other memory locations, and it + // defines the bit-field's memory location as the current run of + // non-zero-width bit-fields. So an access unit must never overlap with + // non-bit-field storage or cross a zero-width bit-field. Otherwise, we're + // free to draw the lines as we see fit. + + // Drawing these lines well can be complicated. LLVM generally can't modify a + // program to access memory that it didn't before, so using very narrow access + // units can prevent the compiler from using optimal access patterns. For + // example, suppose a run of bit-fields occupies four bytes in a struct. If we + // split that into four 1-byte access units, then a sequence of assignments + // that doesn't touch all four bytes may have to be emitted with multiple + // 8-bit stores instead of a single 32-bit store. On the other hand, if we use + // very wide access units, we may find ourselves emitting accesses to + // bit-fields we didn't really need to touch, just because LLVM was unable to + // clean up after us. + + // It is desirable to have access units be aligned powers of 2 no larger than + // a register. (On non-strict alignment ISAs, the alignment requirement can be + // dropped.) A three byte access unit will be accessed using 2-byte and 1-byte + // accesses and bit manipulation. If no bitfield straddles across the two + // separate accesses, it is better to have separate 2-byte and 1-byte access + // units, as then LLVM will not generate unnecessary memory accesses, or bit + // manipulation. Similarly, on a strict-alignment architecture, it is better + // to keep access-units naturally aligned, to avoid similar bit + // manipulation synthesizing larger unaligned accesses. + + // Bitfields that share parts of a single byte are, of necessity, placed in + // the same access unit. That unit will encompass a consecutive run where + // adjacent bitfields share parts of a byte. (The first bitfield of such an + // access unit will start at the beginning of a byte.) + + // We then try and accumulate adjacent access units when the combined unit is + // naturally sized, no larger than a register, and (on a strict alignment + // ISA), naturally aligned. Note that this requires lookahead to one or more + // subsequent access units. For instance, consider a 2-byte access-unit + // followed by 2 1-byte units. We can merge that into a 4-byte access-unit, + // but we would not want to merge a 2-byte followed by a single 1-byte (and no + // available tail padding). We keep track of the best access unit seen so far, + // and use that when we determine we cannot accumulate any more. Then we start + // again at the bitfield following that best one. + + // The accumulation is also prevented when: + // *) it would cross a character-aigned zero-width bitfield, or + // *) fine-grained bitfield access option is in effect. + + CharUnits RegSize = + bitsToCharUnits(Context.getTargetInfo().getRegisterWidth()); + unsigned CharBits = Context.getCharWidth(); + + // Data about the start of the span we're accumulating to create an access + // unit from. Begin is the first bitfield of the span. If Begin is FieldEnd, + // we've not got a current span. The span starts at the BeginOffset character + // boundary. BitSizeSinceBegin is the size (in bits) of the span -- this might + // include padding when we've advanced to a subsequent bitfield run. + RecordDecl::field_iterator Begin = FieldEnd; + CharUnits BeginOffset; + uint64_t BitSizeSinceBegin; + + // The (non-inclusive) end of the largest acceptable access unit we've found + // since Begin. If this is Begin, we're gathering the initial set of bitfields + // of a new span. BestEndOffset is the end of that acceptable access unit -- + // it might extend beyond the last character of the bitfield run, using + // available padding characters. + RecordDecl::field_iterator BestEnd = Begin; + CharUnits BestEndOffset; - // The start field is better as a single field run. - bool StartFieldAsSingleRun = false; for (;;) { - // Check to see if we need to start a new run. - if (Run == FieldEnd) { - // If we're out of fields, return. - if (Field == FieldEnd) + // AtAlignedBoundary is true iff Field is the (potential) start of a new + // span (or the end of the bitfields). When true, LimitOffset is the + // character offset of that span and Barrier indicates whether the that new + // span cannot be merged into the current one. + bool AtAlignedBoundary = false; + bool Barrier = false; + + if (Field != FieldEnd && Field->isBitField()) { + uint64_t BitOffset = getFieldBitOffset(*Field); + if (Begin == FieldEnd) { + // Beginning a new span. + Begin = Field; + BestEnd = Begin; + + assert((BitOffset % CharBits) == 0 && "Not at start of char"); + BeginOffset = bitsToCharUnits(BitOffset); + BitSizeSinceBegin = 0; + } else if ((BitOffset % CharBits) != 0) { + // Bitfield occupies the same character as previous bitfield, it must be + // part of the same span. This can include zero-length bitfields, should + // the target not align them to character boundaries. Such non-alignment + // is at variance with the C++ std that requires zero-length bitfields ---------------- rjmccall wrote:
```suggestion // is at variance with the standards, which require zero-length bitfields ``` https://github.com/llvm/llvm-project/pull/65742 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits