This revision was automatically updated to reflect the committed changes. sammccall marked 2 inline comments as done. Closed by commit rGd19265b31e65: [clangd] Avoid wasteful data structures in RefSlab::Builder (authored by sammccall).
Changed prior to commit: https://reviews.llvm.org/D79950?vs=264024&id=264719#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D79950/new/ https://reviews.llvm.org/D79950 Files: clang-tools-extra/clangd/index/Ref.cpp clang-tools-extra/clangd/index/Ref.h clang-tools-extra/clangd/index/SymbolLocation.cpp clang-tools-extra/clangd/index/SymbolLocation.h
Index: clang-tools-extra/clangd/index/SymbolLocation.h =================================================================== --- clang-tools-extra/clangd/index/SymbolLocation.h +++ clang-tools-extra/clangd/index/SymbolLocation.h @@ -30,22 +30,23 @@ // Position is encoded into 32 bits to save space. // If Line/Column overflow, the value will be their maximum value. struct Position { - Position() : Line(0), Column(0) {} + Position() : LineColumnPacked(0) {} void setLine(uint32_t Line); - uint32_t line() const { return Line; } + uint32_t line() const { return LineColumnPacked >> ColumnBits; } void setColumn(uint32_t Column); - uint32_t column() const { return Column; } + uint32_t column() const { return LineColumnPacked & MaxColumn; } + uint32_t rep() const { return LineColumnPacked; } bool hasOverflow() const { - return Line >= MaxLine || Column >= MaxColumn; + return line() == MaxLine || column() == MaxColumn; } - static constexpr uint32_t MaxLine = (1 << 20) - 1; - static constexpr uint32_t MaxColumn = (1 << 12) - 1; + static constexpr unsigned ColumnBits = 12; + static constexpr uint32_t MaxLine = (1 << (32 - ColumnBits)) - 1; + static constexpr uint32_t MaxColumn = (1 << ColumnBits) - 1; private: - uint32_t Line : 20; // 0-based - uint32_t Column : 12; // 0-based + uint32_t LineColumnPacked; // Top 20 bit line, bottom 12 bits column. }; /// The symbol range, using half-open range [Start, End). Index: clang-tools-extra/clangd/index/SymbolLocation.cpp =================================================================== --- clang-tools-extra/clangd/index/SymbolLocation.cpp +++ clang-tools-extra/clangd/index/SymbolLocation.cpp @@ -15,18 +15,14 @@ constexpr uint32_t SymbolLocation::Position::MaxColumn; void SymbolLocation::Position::setLine(uint32_t L) { - if (L > MaxLine) { - Line = MaxLine; - return; - } - Line = L; + if (L > MaxLine) + L = MaxLine; + LineColumnPacked = (L << ColumnBits) | column(); } void SymbolLocation::Position::setColumn(uint32_t Col) { - if (Col > MaxColumn) { - Column = MaxColumn; - return; - } - Column = Col; + if (Col > MaxColumn) + Col = MaxColumn; + LineColumnPacked = (LineColumnPacked & ~MaxColumn) | Col; } llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolLocation &L) { Index: clang-tools-extra/clangd/index/Ref.h =================================================================== --- clang-tools-extra/clangd/index/Ref.h +++ clang-tools-extra/clangd/index/Ref.h @@ -13,6 +13,8 @@ #include "SymbolLocation.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include <cstdint> @@ -132,9 +134,17 @@ RefSlab build() &&; private: + // A ref we're storing with its symbol to consume with build(). + // All strings are interned, so DenseMapInfo can use pointer comparisons. + struct Entry { + SymbolID Symbol; + Ref Reference; + }; + friend struct llvm::DenseMapInfo<Entry>; + llvm::BumpPtrAllocator Arena; llvm::UniqueStringSaver UniqueStrings; // Contents on the arena. - llvm::DenseMap<SymbolID, std::set<Ref>> Refs; + llvm::DenseSet<Entry> Entries; }; private: @@ -151,4 +161,31 @@ } // namespace clangd } // namespace clang +namespace llvm { +template <> struct DenseMapInfo<clang::clangd::RefSlab::Builder::Entry> { + using Entry = clang::clangd::RefSlab::Builder::Entry; + static inline Entry getEmptyKey() { + static Entry E{clang::clangd::SymbolID(""), {}}; + return E; + } + static inline Entry getTombstoneKey() { + static Entry E{clang::clangd::SymbolID("TOMBSTONE"), {}}; + return E; + } + static unsigned getHashValue(const Entry &Val) { + return llvm::hash_combine( + Val.Symbol, reinterpret_cast<uintptr_t>(Val.Reference.Location.FileURI), + Val.Reference.Location.Start.rep(), Val.Reference.Location.End.rep()); + } + static bool isEqual(const Entry &LHS, const Entry &RHS) { + return std::tie(LHS.Symbol, LHS.Reference.Location.FileURI, + LHS.Reference.Kind) == + std::tie(RHS.Symbol, RHS.Reference.Location.FileURI, + RHS.Reference.Kind) && + LHS.Reference.Location.Start == RHS.Reference.Location.Start && + LHS.Reference.Location.End == RHS.Reference.Location.End; + } +}; +}; // namespace llvm + #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H Index: clang-tools-extra/clangd/index/Ref.cpp =================================================================== --- clang-tools-extra/clangd/index/Ref.cpp +++ clang-tools-extra/clangd/index/Ref.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Ref.h" +#include "llvm/ADT/STLExtras.h" namespace clang { namespace clangd { @@ -33,27 +34,34 @@ } void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) { - auto &M = Refs[ID]; - if (M.count(S)) - return; - Ref R = S; - R.Location.FileURI = - UniqueStrings.save(R.Location.FileURI).data(); - M.insert(std::move(R)); + Entry E = {ID, S}; + E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data(); + Entries.insert(std::move(E)); } RefSlab RefSlab::Builder::build() && { - // We can reuse the arena, as it only has unique strings and we need them all. - // Reallocate refs on the arena to reduce waste and indirections when reading. std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result; - Result.reserve(Refs.size()); - size_t NumRefs = 0; - for (auto &Sym : Refs) { - std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end()); - NumRefs += SymRefs.size(); - Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena)); + // We'll reuse the arena, as it only has unique strings and we need them all. + // We need to group refs by symbol and form contiguous arrays on the arena. + std::vector<std::pair<SymbolID, const Ref *>> Flat; + Flat.reserve(Entries.size()); + for (const Entry &E : Entries) + Flat.emplace_back(E.Symbol, &E.Reference); + // Group by SymbolID. + llvm::sort(Flat, llvm::less_first()); + std::vector<Ref> Refs; + // Loop over symbols, copying refs for each onto the arena. + for (auto I = Flat.begin(), End = Flat.end(); I != End;) { + SymbolID Sym = I->first; + Refs.clear(); + do { + Refs.push_back(*I->second); + ++I; + } while (I != End && I->first == Sym); + llvm::sort(Refs); // By file, affects xrefs display order. + Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena)); } - return RefSlab(std::move(Result), std::move(Arena), NumRefs); + return RefSlab(std::move(Result), std::move(Arena), Entries.size()); } } // namespace clangd
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits