sammccall updated this revision to Diff 127918.
sammccall added a comment.

minor doc and code layout tweaks


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D41506

Files:
  clangd/index/FileIndex.cpp
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  clangd/index/SymbolYAML.cpp
  unittests/clangd/CodeCompleteTests.cpp
  unittests/clangd/FileIndexTests.cpp
  unittests/clangd/IndexTests.cpp
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -32,8 +32,7 @@
 
 // GMock helpers for matching Symbol.
 MATCHER_P(QName, Name, "") {
-  return (arg.second.Scope + (arg.second.Scope.empty() ? "" : "::") +
-          arg.second.Name).str() == Name;
+  return (arg.Scope + (arg.Scope.empty() ? "" : "::") + arg.Name).str() == Name;
 }
 
 namespace clang {
Index: unittests/clangd/IndexTests.cpp
===================================================================
--- unittests/clangd/IndexTests.cpp
+++ unittests/clangd/IndexTests.cpp
@@ -45,18 +45,18 @@
 std::shared_ptr<std::vector<const Symbol *>>
 generateSymbols(std::vector<std::string> QualifiedNames,
                 std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  auto Slab = std::make_shared<SlabAndPointers>();
-  if (WeakSymbols)
-    *WeakSymbols = Slab;
-
+  SymbolSlab::Builder Slab;
   for (llvm::StringRef QName : QualifiedNames)
-    Slab->Slab.insert(symbol(QName));
+    Slab.insert(symbol(QName));
 
-  for (const auto &Sym : Slab->Slab)
-    Slab->Pointers.push_back(&Sym.second);
-
-  auto *Pointers = &Slab->Pointers;
-  return {std::move(Slab), Pointers};
+  auto Storage = std::make_shared<SlabAndPointers>();
+  Storage->Slab = std::move(Slab).build();
+  for (const auto &Sym : Storage->Slab)
+    Storage->Pointers.push_back(&Sym);
+  if (WeakSymbols)
+    *WeakSymbols = Storage;
+  auto *Pointers = &Storage->Pointers;
+  return {std::move(Storage), Pointers};
 }
 
 // Create a slab of symbols with IDs and names [Begin, End], otherwise identical
Index: unittests/clangd/FileIndexTests.cpp
===================================================================
--- unittests/clangd/FileIndexTests.cpp
+++ unittests/clangd/FileIndexTests.cpp
@@ -28,9 +28,11 @@
   return Sym;
 }
 
-void addNumSymbolsToSlab(int Begin, int End, SymbolSlab *Slab) {
+std::unique_ptr<SymbolSlab> numSlab(int Begin, int End) {
+  SymbolSlab::Builder Slab;
   for (int i = Begin; i <= End; i++)
-    Slab->insert(symbol(std::to_string(i)));
+    Slab.insert(symbol(std::to_string(i)));
+  return llvm::make_unique<SymbolSlab>(std::move(Slab).build());
 }
 
 std::vector<std::string>
@@ -45,46 +47,29 @@
   FileSymbols FS;
   EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre());
 
-  auto Slab = llvm::make_unique<SymbolSlab>();
-  addNumSymbolsToSlab(1, 3, Slab.get());
-
-  FS.update("f1", std::move(Slab));
-
+  FS.update("f1", numSlab(1, 3));
   EXPECT_THAT(getSymbolNames(*FS.allSymbols()),
               UnorderedElementsAre("1", "2", "3"));
 }
 
 TEST(FileSymbolsTest, Overlap) {
   FileSymbols FS;
-
-  auto Slab = llvm::make_unique<SymbolSlab>();
-  addNumSymbolsToSlab(1, 3, Slab.get());
-
-  FS.update("f1", std::move(Slab));
-
-  Slab = llvm::make_unique<SymbolSlab>();
-  addNumSymbolsToSlab(3, 5, Slab.get());
-
-  FS.update("f2", std::move(Slab));
-
+  FS.update("f1", numSlab(1, 3));
+  FS.update("f2", numSlab(3, 5));
   EXPECT_THAT(getSymbolNames(*FS.allSymbols()),
               UnorderedElementsAre("1", "2", "3", "3", "4", "5"));
 }
 
 TEST(FileSymbolsTest, SnapshotAliveAfterRemove) {
   FileSymbols FS;
 
-  auto Slab = llvm::make_unique<SymbolSlab>();
-  addNumSymbolsToSlab(1, 3, Slab.get());
-
-  FS.update("f1", std::move(Slab));
+  FS.update("f1", numSlab(1, 3));
 
   auto Symbols = FS.allSymbols();
   EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3"));
 
   FS.update("f1", nullptr);
   EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre());
-
   EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3"));
 }
 
Index: unittests/clangd/CodeCompleteTests.cpp
===================================================================
--- unittests/clangd/CodeCompleteTests.cpp
+++ unittests/clangd/CodeCompleteTests.cpp
@@ -449,6 +449,7 @@
     std::vector<const Symbol *> Pointers;
   };
   auto Snap = std::make_shared<Snapshot>();
+  SymbolSlab::Builder Slab;
   for (const auto &Pair : Symbols) {
     Symbol Sym;
     Sym.ID = SymbolID(Pair.first);
@@ -462,10 +463,11 @@
       Sym.Scope = QName.substr(0, Pos);
     }
     Sym.SymInfo.Kind = Pair.second;
-    Snap->Slab.insert(std::move(Sym));
+    Slab.insert(Sym);
   }
+  Snap->Slab = std::move(Slab).build();
   for (auto &Iter : Snap->Slab)
-    Snap->Pointers.push_back(&Iter.second);
+    Snap->Pointers.push_back(&Iter);
   auto S = std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap),
                                                         &Snap->Pointers);
   I->build(std::move(S));
Index: clangd/index/SymbolYAML.cpp
===================================================================
--- clangd/index/SymbolYAML.cpp
+++ clangd/index/SymbolYAML.cpp
@@ -127,20 +127,18 @@
   std::vector<Symbol> S;
   llvm::yaml::Input Yin(YAMLContent);
   Yin >> S;
-  SymbolSlab Syms;
+  SymbolSlab::Builder Syms;
   for (auto& Sym : S)
-    Syms.insert(std::move(Sym));
-  return Syms;
+    Syms.insert(Sym);
+  return std::move(Syms).build();
 }
 
 std::string SymbolToYAML(const SymbolSlab& Symbols) {
   std::string Str;
   llvm::raw_string_ostream OS(Str);
   llvm::yaml::Output Yout(OS);
-  for (auto &Pair : Symbols) {
-    Symbol MutableSymbol = Pair.second;
-    Yout<< MutableSymbol;
-  }
+  for (Symbol S : Symbols) // copy: Yout<< requires mutability.
+    Yout<< S;
   return OS.str();
 }
 
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -29,13 +29,11 @@
                       unsigned Offset,
                       index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
 
-  void finish() override;
-
-  SymbolSlab takeSymbols() { return std::move(Symbols); }
+  SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
 
 private:
   // All Symbols collected from the AST.
-  SymbolSlab Symbols;
+  SymbolSlab::Builder Symbols;
 };
 
 } // namespace clangd
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -91,7 +91,7 @@
 
     std::string USR(Buff.data(), Buff.size());
     auto ID = SymbolID(USR);
-    if (Symbols.find(ID) != Symbols.end())
+    if (Symbols.find(ID) != nullptr)
       return true;
 
     auto &SM = ND->getASTContext().getSourceManager();
@@ -107,7 +107,5 @@
   return true;
 }
 
-void SymbolCollector::finish() { Symbols.freeze(); }
-
 } // namespace clangd
 } // namespace clang
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -13,9 +13,9 @@
 #include "../Context.h"
 #include "clang/Index/IndexSymbol.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
 #include <array>
 #include <string>
 
@@ -49,6 +49,9 @@
   bool operator==(const SymbolID &Sym) const {
     return HashValue == Sym.HashValue;
   }
+  bool operator<(const SymbolID &Sym) const {
+    return HashValue < Sym.HashValue;
+  }
 
 private:
   friend llvm::hash_code hash_value(const SymbolID &ID) {
@@ -70,13 +73,38 @@
 // "<<" operator.
 void operator>>(llvm::StringRef HexStr, SymbolID &ID);
 
+} // namespace clangd
+} // namespace clang
+namespace llvm {
+// Support SymbolIDs as DenseMap keys.
+template <> struct DenseMapInfo<clang::clangd::SymbolID> {
+  static inline clang::clangd::SymbolID getEmptyKey() {
+    static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
+    return EmptyKey;
+  }
+  static inline clang::clangd::SymbolID getTombstoneKey() {
+    static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
+    return TombstoneKey;
+  }
+  static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
+    return hash_value(Sym);
+  }
+  static bool isEqual(const clang::clangd::SymbolID &LHS,
+                      const clang::clangd::SymbolID &RHS) {
+    return LHS == RHS;
+  }
+};
+} // namespace llvm
+namespace clang {
+namespace clangd {
+
 // The class presents a C++ symbol, e.g. class, function.
 //
 // WARNING: Symbols do not own much of their underlying data - typically strings
 // are owned by a SymbolSlab. They should be treated as non-owning references.
 // Copies are shallow.
 // When adding new unowned data fields to Symbol, remember to update
-// SymbolSlab::insert to copy them to the slab's storage.
+// SymbolSlab::Builder in Index.cpp to copy them to the slab's storage.
 struct Symbol {
   // The ID of the symbol.
   SymbolID ID;
@@ -102,38 +130,56 @@
   // FIXME: add code completion information.
 };
 
-// A symbol container that stores a set of symbols. The container will maintain
-// the lifetime of the symbols.
+// An immutable symbol container that stores a set of symbols.
+// The container will maintain the lifetime of the symbols.
 class SymbolSlab {
 public:
-  using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;
+  using const_iterator = std::vector<Symbol>::const_iterator;
 
   SymbolSlab() = default;
 
   const_iterator begin() const;
   const_iterator end() const;
   const_iterator find(const SymbolID &SymID) const;
 
-  // Once called, no more symbols would be added to the SymbolSlab. This
-  // operation is irreversible.
-  void freeze();
+  size_t size() const { return Symbols.size(); }
+  // Estimates the total memory usage.
+  size_t bytes() const {
+    return sizeof(*this) + Arena.getTotalMemory() +
+           Symbols.capacity() * sizeof(Symbol);
+  }
 
-  // Adds the symbol to this slab.
-  // This is a deep copy: underlying strings will be owned by the slab.
-  void insert(const Symbol& S);
+  // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
+  // The frozen SymbolSlab will use less memory.
+  class Builder {
+   public:
+     // Adds a symbol, overwriting any existing one with the same ID.
+     // This is a deep copy: underlying strings will be owned by the slab.
+     void insert(const Symbol& S);
+
+     // Returns the symbol with an ID, if it exists. Valid until next insert().
+     const Symbol* find(const SymbolID &ID) {
+       auto I = SymbolIndex.find(ID);
+       return I == SymbolIndex.end() ? nullptr : &Symbols[I->second];
+     }
+
+     // Consumes the builder to finalize the slab.
+     SymbolSlab build() &&;
+
+   private:
+     llvm::BumpPtrAllocator Arena;
+     // Intern table for strings. Contents are on the arena.
+     llvm::DenseSet<llvm::StringRef> Strings;
+     std::vector<Symbol> Symbols;
+     llvm::DenseMap<SymbolID, size_t> SymbolIndex;
+  };
 
 private:
-  // Replaces S with a reference to the same string, owned by this slab.
-  void intern(llvm::StringRef &S) {
-    S = S.empty() ? llvm::StringRef() : Strings.insert(S).first->getKey();
-  }
-
-  bool Frozen = false;
+  SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
+      : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
 
-  // Intern table for strings. Not StringPool as we don't refcount, just insert.
-  // We use BumpPtrAllocator to avoid lots of tiny allocations for nodes.
-  llvm::StringSet<llvm::BumpPtrAllocator> Strings;
-  llvm::DenseMap<SymbolID, Symbol> Symbols;
+  llvm::BumpPtrAllocator Arena;
+  std::vector<Symbol> Symbols;
 };
 
 struct FuzzyFindRequest {
@@ -174,27 +220,4 @@
 
 } // namespace clangd
 } // namespace clang
-
-namespace llvm {
-
-template <> struct DenseMapInfo<clang::clangd::SymbolID> {
-  static inline clang::clangd::SymbolID getEmptyKey() {
-    static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
-    return EmptyKey;
-  }
-  static inline clang::clangd::SymbolID getTombstoneKey() {
-    static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
-    return TombstoneKey;
-  }
-  static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
-    return hash_value(Sym);
-  }
-  static bool isEqual(const clang::clangd::SymbolID &LHS,
-                      const clang::clangd::SymbolID &RHS) {
-    return LHS == RHS;
-  }
-};
-
-} // namespace llvm
-
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -13,16 +13,17 @@
 
 namespace clang {
 namespace clangd {
+using namespace llvm;
 
-SymbolID::SymbolID(llvm::StringRef USR)
-    : HashValue(llvm::SHA1::hash(arrayRefFromStringRef(USR))) {}
+SymbolID::SymbolID(StringRef USR)
+    : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
 
-llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID) {
-  OS << toHex(llvm::toStringRef(ID.HashValue));
+raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
+  OS << toHex(toStringRef(ID.HashValue));
   return OS;
 }
 
-void operator>>(llvm::StringRef Str, SymbolID &ID) {
+void operator>>(StringRef Str, SymbolID &ID) {
   std::string HexString = fromHex(Str);
   assert(HexString.size() == 20);
   std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin());
@@ -32,23 +33,56 @@
 
 SymbolSlab::const_iterator SymbolSlab::end() const { return Symbols.end(); }
 
-SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &SymID) const {
-  return Symbols.find(SymID);
+SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
+  auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID,
+                             [](const Symbol &S, const SymbolID &I) {
+                               return S.ID == I;
+                             });
+  if (It != Symbols.end() || It->ID == ID)
+    return It;
+  return Symbols.end();
 }
 
-void SymbolSlab::freeze() { Frozen = true; }
+// Copy the underlying data of the symbol into the owned arena.
+static void own(Symbol &S, DenseSet<StringRef> &Strings,
+                BumpPtrAllocator &Arena) {
+  // Intern replaces V with a reference to the same string owned by the arena.
+  auto Intern = [&](StringRef &V) {
+    auto R = Strings.insert(V);
+    if (R.second) { // New entry added to the table, copy the string.
+      char *Data = Arena.Allocate<char>(V.size());
+      memcpy(Data, V.data(), V.size());
+      *R.first = StringRef(Data, V.size());
+    }
+    V = *R.first;
+  };
 
-void SymbolSlab::insert(const Symbol &S) {
-  assert(!Frozen && "Can't insert a symbol after the slab has been frozen!");
-  auto ItInserted = Symbols.try_emplace(S.ID, S);
-  if (!ItInserted.second)
-    return;
-  auto &Sym = ItInserted.first->second;
+  // We need to copy every StringRef field onto the arena.
+  Intern(S.Name);
+  Intern(S.Scope);
+  Intern(S.CanonicalDeclaration.FilePath);
+}
+
+void SymbolSlab::Builder::insert(const Symbol &S) {
+  auto R = SymbolIndex.try_emplace(S.ID, Symbols.size());
+  if (R.second) {
+    Symbols.push_back(S);
+    own(Symbols.back(), Strings, Arena);
+  } else
+    own(Symbols[R.first->second] = S, Strings, Arena);
+}
 
-  // We inserted a new symbol, so copy the underlying data.
-  intern(Sym.Name);
-  intern(Sym.Scope);
-  intern(Sym.CanonicalDeclaration.FilePath);
+SymbolSlab SymbolSlab::Builder::build() && {
+  Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit.
+  // Sort symbols so the slab can binary search over them.
+  std::sort(Symbols.begin(), Symbols.end(),
+            [](const Symbol &L, const Symbol &R) { return L.ID < R.ID; });
+  // We may have unused strings from overwritten symbols. Build a new arena.
+  BumpPtrAllocator Arena;
+  DenseSet<StringRef> Strings;
+  for (auto &S : Symbols)
+    own(S, Strings, Arena);
+  return SymbolSlab(std::move(Arena), std::move(Symbols));
 }
 
 } // namespace clangd
Index: clangd/index/FileIndex.cpp
===================================================================
--- clangd/index/FileIndex.cpp
+++ clangd/index/FileIndex.cpp
@@ -54,7 +54,7 @@
     for (const auto &FileAndSlab : FileToSlabs) {
       Snap->KeepAlive.push_back(FileAndSlab.second);
       for (const auto &Iter : *FileAndSlab.second)
-        Snap->Pointers.push_back(&Iter.second);
+        Snap->Pointers.push_back(&Iter);
     }
   }
   auto *Pointers = &Snap->Pointers;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to